# 期末大作业  
首先导入头文件

In [1]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.init as init


数据准备

In [2]:
class ChemicalMoleculeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): 数据集的根目录，包含各类药物文件夹
            transform (callable, optional): 可选的图像转换（如标准化、数据增强等）
        """
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        
        # 预定义药物类别
        self.subclasses = ['Analgesic', 'Antibacterial', 'Antidepressant', 'Antidiabetic',
                           'Antifungal', 'Antihistamine', 'Antihypertensive', 'Antioxidant',
                           'Antiprotozoal', 'Antipyretic', 'Antispasmodic', 'Antitumor',
                           'Antiviral', 'Diuretic', 'Hypnotic', 'Sedative', 'Anti-inflammatory']
        
        # 遍历每个类别文件夹
        for label, subdir in enumerate(self.subclasses):
            subdir_path = os.path.join(root_dir, subdir)
            if os.path.exists(subdir_path):
                for filename in os.listdir(subdir_path):
                    if filename.endswith('.jpg') or filename.endswith('.png'):
                        self.images.append(os.path.join(subdir_path, filename))
                        self.labels.append(label)  # 每个子文件夹对应一个标签（从0开始）

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_name)
        image = image.convert('RGB')  # 将灰度图像转换为 RGB
        if self.transform:
            image = self.transform(image)
        
        return image, label


图像转化并创建数据集，将其分为8:2数据集和测试集

In [3]:
# 图像转换（包括数据增强和标准化）
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 调整图像大小为224x224，以适应VGG16输入尺寸
    transforms.RandomHorizontalFlip(),  # 随机水平翻转（数据增强）
    transforms.ToTensor(),  # 转换为tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 标准化
])

# 创建数据集
train_dataset = ChemicalMoleculeDataset(root_dir=r'E:\code\Jupyter\final_repo\pics', transform=transform)

# 将数据集分成训练集和验证集
train_size = int(0.8 * len(train_dataset))  # 80%作为训练集
val_size = len(train_dataset) - train_size  # 20%作为验证集
train_data, val_data = torch.utils.data.random_split(train_dataset, [train_size, val_size])

# 数据加载器
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

## 构建模型
我们将使用 VGG16 作为基础模型，并修改最后一层全连接层，以适应二分类任务（炎症治疗有效 vs 无炎症治疗效果）。此外，我们使用预训练的 VGG16 模型，并将除最后一层外的参数固定，以便更快地收敛。

In [4]:
def initialize_weights(model):
    for module in model.modules():
        if isinstance(module, nn.Conv2d):  # 卷积层初始化
            init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
        elif isinstance(module, nn.Linear):  # 全连接层初始化
            init.xavier_normal_(module.weight)
        elif isinstance(module, nn.BatchNorm2d):  # 批归一化层初始化
            init.constant_(module.weight, 1)
            init.constant_(module.bias, 0)
# 使用随机初始化的VGG16模型

model = models.vgg16(pretrained=False)

# 冻结前面几层的参数，只训练最后的全连接层
for param in model.parameters():
    param.requires_grad = False

# 
model.classifier[6] = nn.Linear(4096, 17)  # 输出2个类别：炎症治疗有效（1）和无炎症治疗效果（0）

# 初始化权重
initialize_weights(model)

# 将模型移到GPU（如果有可用的GPU）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)




# 定义损失函数和优化器
我们使用 交叉熵损失 作为损失函数，因为这是二分类问题的标准选择。优化器使用 Adam，并只更新最后一层的参数。

In [5]:
# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()

# 使用Adam优化器，只更新最后一层的参数
optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.001)


# 训练模型
接下来，我们定义训练函数。每个 epoch 包括训练和验证两个阶段。我们会在每个 epoch 后保存验证集上表现最好的模型。

In [6]:
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    best_val_acc = 0.0  # 保存验证集上的最佳准确率
    
    for epoch in range(num_epochs):
        model.train()  # 设置为训练模式
        running_loss = 0.0
        correct = 0
        total = 0
        
        # 训练阶段
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()  # 清零梯度
            
            # 前向传播
            outputs = model(inputs)
            
            # 计算损失
            loss = criterion(outputs, labels)
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数
            
            # 计算训练集精度
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_acc = correct / total * 100
        train_loss = running_loss / len(train_loader)
        
        # 验证阶段
        val_acc = evaluate(model, val_loader)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, Val Accuracy: {val_acc:.2f}%')
        
        # 保存最好的模型
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
    
    print(f"Training completed. Best validation accuracy: {best_val_acc:.2f}%")

# 验证函数
def evaluate(model, val_loader):
    model.eval()  # 设置为评估模式
    correct = 0
    total = 0
    with torch.no_grad():  # 不需要计算梯度
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total * 100

# 开始训练
train(model, train_loader, val_loader, criterion, optimizer, num_epochs=30)


Epoch [1/30], Train Loss: 2.6494, Train Accuracy: 12.03%, Val Accuracy: 7.93%
Epoch [2/30], Train Loss: 2.6465, Train Accuracy: 12.47%, Val Accuracy: 17.60%
Epoch [3/30], Train Loss: 2.6513, Train Accuracy: 12.28%, Val Accuracy: 10.89%
Epoch [4/30], Train Loss: 2.6399, Train Accuracy: 12.60%, Val Accuracy: 16.33%
Epoch [5/30], Train Loss: 2.6616, Train Accuracy: 11.90%, Val Accuracy: 12.42%
Epoch [6/30], Train Loss: 2.6577, Train Accuracy: 11.65%, Val Accuracy: 16.49%
Epoch [7/30], Train Loss: 2.6597, Train Accuracy: 12.50%, Val Accuracy: 11.15%
Epoch [8/30], Train Loss: 2.6520, Train Accuracy: 12.24%, Val Accuracy: 17.65%
Epoch [9/30], Train Loss: 2.6616, Train Accuracy: 12.37%, Val Accuracy: 18.29%
Epoch [10/30], Train Loss: 2.6556, Train Accuracy: 12.13%, Val Accuracy: 11.52%
Epoch [11/30], Train Loss: 2.6574, Train Accuracy: 11.76%, Val Accuracy: 13.16%
Epoch [12/30], Train Loss: 2.6526, Train Accuracy: 12.60%, Val Accuracy: 15.64%
Epoch [13/30], Train Loss: 2.6539, Train Accuracy:

# 评估和测试模型
训练完成后，我们可以加载最佳模型，并在验证集或测试集上评估其性能。

In [7]:
# 加载最佳模型
model.load_state_dict(torch.load('best_model.pth'))

# 在验证集上测试模型
val_acc = evaluate(model, val_loader)
print(f"Final Validation Accuracy: {val_acc:.2f}%")


  model.load_state_dict(torch.load('best_model.pth'))


Final Validation Accuracy: 21.04%
