### 场景分类

In [1]:
import torch 
from torch import nn

from torch.utils.data import Dataset, DataLoader
import pandas as pd
from torchvision import transforms
from torch.utils.data import random_split

import os
from PIL import Image



  from .autonotebook import tqdm as notebook_tqdm


##### 数据集处理

In [2]:
class ImageTextDataset(Dataset):
    def __init__(self, img_dir, labels_file, transform=None):
        self.img_dir = img_dir
        self.labels = pd.read_csv(labels_file)
        self.transform = transform
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.labels.iloc[idx, 0])  # 拼接label文件中的图片名称，获取图像路径
        image = Image.open(img_path).convert('RGB')    #根据路径导入图片
        label = self.labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label
    
transform = transforms.Compose([
    transforms.Resize((64, 64)),  
    transforms.ToTensor(),          
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

full_dataset = ImageTextDataset(
    img_dir="scene classification\scene classification\data",
    labels_file="scene classification\scene classification\data.csv",
    transform=transform
)
# seed
torch.manual_seed(42)

# 数据集划分比例（70%, 15%, 15%）
total_samples = len(full_dataset)
train_size = int(0.7 * total_samples)
val_size = int(0.15 * total_samples)
test_size = total_samples - train_size - val_size
# 划分数据集
train_dataset, val_dataset, test_dataset = random_split(
    full_dataset,
    [train_size, val_size, test_size]
)
train_loader = DataLoader(train_dataset, batch_size=11923, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=11923, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=11923, shuffle=False)
print(f"总样本数: {total_samples}")
print(f"训练集: {len(train_dataset)} | 验证集: {len(val_dataset)} | 测试集: {len(test_dataset)}")



FileNotFoundError: [Errno 2] No such file or directory: 'scene classification\\scene classification\\data.csv'

##### 模型构建

In [None]:
class MlpClassifier(nn.Module):
    def __init__(self, img_size, num_classes=6):
        super(MlpClassifier, self).__init__()
        self.fc1 = nn.Linear(3*img_size[0]*img_size[1], 512)
        self.bn1 = nn.BatchNorm1d(512)  # 批归一化加速收敛
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # 展平图像为一维向量
        x = self.dropout(self.relu(self.bn1(self.fc1(x))))
        x = self.dropout(self.relu(self.bn2(self.fc2(x))))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x
    

class CnnClassifier(nn.Module):
    def __init__(self, img_size, num_classes=6):
        super(CnnClassifier, self).__init__()
        # 卷积层
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        # 池化层
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # 全连接层
        self.fc1 = nn.Linear(128 * (img_size[0] // 8) * (img_size[1] // 8), 512)
        self.fc2 = nn.Linear(512, num_classes)
        
        # Dropout 和激活函数
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        # 卷积层 + 池化层
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.pool(x)
        
        # 展平
        x = x.view(x.size(0), -1)
        
        # 全连接层
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        
        return x
    
        

##### 训练

In [None]:

# model = MlpClassifier(img_size=(64,64) ,num_classes=6).to('cuda')
model = CnnClassifier(img_size=(64, 64), num_classes=6).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

for epoch in range(100):
    model.train()
    train_loss = 0.0
    num_batches = 0
    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to('cuda'), labels.to('cuda')
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad() 
        
        train_loss += loss.item()
        num_batches += 1

        # if (batch_idx + 1) % 16 == 0:  # 16 out 1 
        #     print(f"Epoch {epoch+1}/{10}, Batch {batch_idx+1}/{len(train_loader)}")
        #     print(f"Loss: {loss.item():.4f} (当前批次)")
        #     print(f"累计平均Loss: {train_loss / num_batches:.4f}")

    # eval 
    if epoch % 10 == 0:
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images_val, labels_val in val_loader:
                images_val, labels_val = images_val.to('cuda'), labels_val.to('cuda')
                outputs_val = model(images_val)
                val_loss += criterion(outputs_val, labels_val).item()

            

        # log
        print(f"Epoch {epoch+1}:")
        print(f"  训练Loss: {train_loss / len(train_loader):.4f}")
        print(f"  验证Loss: {val_loss / len(val_loader):.4f}")

    

Epoch 1:
  训练Loss: 1.8193
  验证Loss: 1.6025
Epoch 11:
  训练Loss: 1.2957
  验证Loss: 1.4059
Epoch 21:
  训练Loss: 1.1641
  验证Loss: 1.2132
Epoch 31:
  训练Loss: 1.0800
  验证Loss: 1.0949
Epoch 41:
  训练Loss: 0.9952
  验证Loss: 1.0338
Epoch 51:
  训练Loss: 0.9175
  验证Loss: 0.9974
Epoch 61:
  训练Loss: 0.8402
  验证Loss: 1.0098
Epoch 71:
  训练Loss: 0.7603
  验证Loss: 0.9977
Epoch 81:
  训练Loss: 0.7088
  验证Loss: 1.0007
Epoch 91:
  训练Loss: 0.6135
  验证Loss: 1.0434


##### 测试

In [None]:
model.eval() 
test_true = []
test_pred = []
test_loss = 0.0

with torch.no_grad():
    for images_test, labels_test in test_loader:
        images_test, labels_test = images_test.to('cuda'), labels_test.to('cuda')
        outputs_test = model(images_test)
        test_loss += criterion(outputs_test, labels_test).item()
        
        # 收集标签
        test_true.extend(labels_test.cpu().numpy())
        test_pred.extend(torch.argmax(outputs_test, dim=1).cpu().numpy())

# 计算指标
from sklearn.metrics import accuracy_score, f1_score
accuracy = accuracy_score(test_true, test_pred)
f1 = f1_score(test_true, test_pred, average='weighted')  

# 打印结果
print(f"测试集最终结果:测试Loss: {test_loss / len(test_loader):.4f}\n测试Accuracy: {accuracy:.4f}\n测试F1 Score: {f1:.4f}")

测试集最终结果:测试Loss: 1.0669
测试Accuracy: 0.6197
测试F1 Score: 0.6140
