# 实验准备

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
from collections import defaultdict, OrderedDict

import nltk
from nltk.tokenize import word_tokenize
from PIL import Image

from transformers import BertModel

如果计算机安装有CUDA，则使用CUDA进行接下来的全部训练，否则使用CPU

In [2]:
# 如果计算机安装有CUDA，则使用CUDA进行接下来的全部训练，否则使用CPU
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('using device:', device)

using device: cuda


划分训练集、验证集、测试集，并转换为Dataloader对象

In [3]:
class SentimentDataset(Dataset):
    def __init__(self, dataframe, image_path, transform=None):
        self.guids = dataframe['guid'].values
        self.tags = dataframe['tag'].values
        self.image_path = image_path
        self.transform = transform
        self.label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}  # 标签映射

    def __len__(self):
        return len(self.guids)

    def __getitem__(self, index):
        guid = self.guids[index]

        image_path = self.image_path + str(guid) + '.jpg'
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
            
        tag = self.tags[index]
        if pd.isna(tag):
            label = 3
        else:
            label = self.label_mapping[tag]

        return image, label
    
def collate_fn(batch):
    images = torch.stack([sample[0] for sample in batch])
    
    labels = torch.tensor([sample[1] for sample in batch], dtype=torch.long)
    
    return images, labels

train_set_path = '../dataset/train.txt'
test_set_path = '../dataset/test_without_label.txt'
image_path = '../dataset/data/'

# 读取训练数据
train_df = pd.read_csv(train_set_path, index_col=False)

# 将训练数据划分为训练集和验证集，固定划分（8:2）
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# 读取测试数据，即测试集
test_df = pd.read_csv(test_set_path, index_col=False)

num_train = train_df.shape[0]
num_val = val_df.shape[0]
num_test = test_df.shape[0]

print('X_train: ', num_train)
print('X_val: ', num_val)
print('X_test: ', num_test)
print()

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = SentimentDataset(train_df, image_path, image_transform)
val_dataset = SentimentDataset(val_df, image_path, image_transform)
test_dataset = SentimentDataset(test_df, image_path, image_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)

X_train:  3200
X_val:  800
X_test:  511


查看Dataloader数据格式

In [4]:
for images, labels in train_loader:
    print(images[0])
    print(type(images[0]))
    print(labels[0])
    print(type(labels[0]))
    break

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])
<class 'torch.Tensor'>
tensor(0)
<class 'torch.Tensor'>


# 训练并评价模型

定义图像分类模型

In [5]:
class ImageAlexNet(nn.Module):
    def __init__(self, output_dim, dropout=0.0):
        super(ImageAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, output_dim),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
    
    
# 定义深度可分离卷积层
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, stride, dropout):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1,
                                        groups=in_channels, bias=False)
        self.pointwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=dropout)  # 添加Dropout层

    def forward(self, x):
        x = self.depthwise_conv(x)
        x = self.pointwise_conv(x)
        x = self.batch_norm(x)
        x = self.relu(x)
        return x

class ImageMobileNetV1(nn.Module):
    def __init__(self, output_dim, dropout=0.0):
        super(ImageMobileNetV1, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),

            DepthwiseSeparableConv(32, 64, stride=1, dropout=dropout),
            DepthwiseSeparableConv(64, 128, stride=2, dropout=dropout),
            DepthwiseSeparableConv(128, 128, stride=1, dropout=dropout),
            DepthwiseSeparableConv(128, 256, stride=2, dropout=dropout),
            DepthwiseSeparableConv(256, 256, stride=1, dropout=dropout),

            DepthwiseSeparableConv(256, 512, stride=2, dropout=dropout),
            DepthwiseSeparableConv(512, 512, stride=1, dropout=dropout),
            DepthwiseSeparableConv(512, 512, stride=1, dropout=dropout),
            DepthwiseSeparableConv(512, 512, stride=1, dropout=dropout),
            DepthwiseSeparableConv(512, 512, stride=1, dropout=dropout),

            DepthwiseSeparableConv(512, 1024, stride=2, dropout=dropout),
            DepthwiseSeparableConv(1024, 1024, stride=1, dropout=dropout),

            nn.AdaptiveAvgPool2d(1)
        )

        self.fc = nn.Linear(1024, output_dim)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

检查模型准确率

In [6]:
def check_accuracy(model):
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            scores = model(images)
            _, preds = scores.max(1)
            num_correct += (preds == labels).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        return acc

训练模型

In [7]:
def train_model(model, optimizer, epochs):
    model.to(device)
    model.train()
    
    train_loss = []
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in train_loader:
            # 将待更新参数的梯度置为零
            optimizer.zero_grad()
            
            images = images.to(device)
            labels = labels.to(device)

            scores = model(images)
            
            loss = F.cross_entropy(scores, labels)

            # 反向传播，计算梯度
            loss.backward()

            # 利用梯度更新参数
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / num_train
        print(f'Epoch: {epoch + 1}/{epochs}')
        print(f'Train Loss: {avg_loss}')
        train_loss.append(avg_loss)
        
    val_acc = check_accuracy(model)
    
    return model, train_loss, val_acc

记录训练损失

In [8]:
train_loss_all = dict()

使用AlexNet作为图像分类模型

In [9]:
best_model = None
best_lr = -1
best_val_acc = -1

num_classes = 3

model_type = 'AlexNet'
learning_rates = [1e-4, 5e-4, 1e-3]

for lr in learning_rates:
    model = ImageAlexNet(num_classes)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    model, train_loss, val_acc = train_model(model, optimizer, epochs=30)
    key = model_type
    train_loss_all[key] = train_loss
    if val_acc > best_val_acc:
        # best_model = model
        best_val_acc = val_acc
        best_lr = lr
    print('model %s lr %e val accuracy: %f' % (model_type, lr, val_acc))
        
print('best validation accuracy achieved:')
print('model %s lr %e val accuracy: %f' % (model_type, best_lr, best_val_acc))
# model AlexNet lr 1.000000e-03 val accuracy: 0.603750

Epoch: 1/30
Train Loss: 0.01445817094296217
Epoch: 2/30
Train Loss: 0.014240342117846012
Epoch: 3/30
Train Loss: 0.014107038378715514
Epoch: 4/30
Train Loss: 0.014111473597586155
Epoch: 5/30
Train Loss: 0.014031991809606553
Epoch: 6/30
Train Loss: 0.014051473028957843
Epoch: 7/30
Train Loss: 0.013919213954359293
Epoch: 8/30
Train Loss: 0.013888083938509226
Epoch: 9/30
Train Loss: 0.013876511063426734
Epoch: 10/30
Train Loss: 0.013816738463938237
Epoch: 11/30
Train Loss: 0.013744545821100473
Epoch: 12/30
Train Loss: 0.013687987010926009
Epoch: 13/30
Train Loss: 0.013601127434521914
Epoch: 14/30
Train Loss: 0.013392129223793745
Epoch: 15/30
Train Loss: 0.013376844376325607
Epoch: 16/30
Train Loss: 0.013169625904411078
Epoch: 17/30
Train Loss: 0.01300957439467311
Epoch: 18/30
Train Loss: 0.012930670101195575
Epoch: 19/30
Train Loss: 0.012620076835155487
Epoch: 20/30
Train Loss: 0.012295433636754751
Epoch: 21/30
Train Loss: 0.011920452620834111
Epoch: 22/30
Train Loss: 0.011586934365332126

使用MobileNet作为图像分类模型

In [10]:
best_model = None
best_lr = -1
best_val_acc = -1

num_classes = 3

model_type = 'MobileNet'
learning_rates = [1e-4, 5e-4, 1e-3]

for lr in learning_rates:
    model = ImageMobileNetV1(num_classes)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    model, train_loss, val_acc = train_model(model, optimizer, epochs=30)
    key = model_type
    train_loss_all[key] = train_loss
    if val_acc > best_val_acc:
        # best_model = model
        best_val_acc = val_acc
        best_lr = lr
    print('model %s lr %e val accuracy: %f' % (model_type, lr, val_acc))
        
print('best validation accuracy achieved:')
print('model %s lr %e val accuracy: %f' % (model_type, best_lr, best_val_acc))
# model MobileNet lr 5.000000e-04 val accuracy: 0.543750

Epoch: 1/30
Train Loss: 0.014614721219986677
Epoch: 2/30
Train Loss: 0.01261398421600461
Epoch: 3/30
Train Loss: 0.009338223421946169
Epoch: 4/30
Train Loss: 0.004201586246490478
Epoch: 5/30
Train Loss: 0.0019338873436208813
Epoch: 6/30
Train Loss: 0.0013678188167978078
Epoch: 7/30
Train Loss: 0.0010870640704524702
Epoch: 8/30
Train Loss: 0.0010486529997433536
Epoch: 9/30
Train Loss: 0.0007490850723115728
Epoch: 10/30
Train Loss: 0.0008922717958921567
Epoch: 11/30
Train Loss: 0.0006627585910609923
Epoch: 12/30
Train Loss: 0.0007108198782952969
Epoch: 13/30
Train Loss: 0.0004967323111486621
Epoch: 14/30
Train Loss: 0.0005953232641331852
Epoch: 15/30
Train Loss: 0.0003766826734499773
Epoch: 16/30
Train Loss: 0.00041462374247203115
Epoch: 17/30
Train Loss: 0.00030959014024119826
Epoch: 18/30
Train Loss: 0.0002859567566338228
Epoch: 19/30
Train Loss: 0.00022279444068772135
Epoch: 20/30
Train Loss: 0.00021087224235088797
Epoch: 21/30
Train Loss: 0.00022373715031790198
Epoch: 22/30
Train Los

预测测试集的标签并补全保存

In [None]:
def predict(model):
    all_preds = []
    
    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            scores = model(images)
            _, preds = scores.max(1)
            all_preds.append(preds.cpu().numpy())
            
    tag = np.concatenate(all_preds)
    label_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'}  # 标签映射
    tag_mapping = []
    for i in range(len(tag)):
        tag_mapping.append(label_mapping[tag[i]])
    
    test_df = pd.read_csv(test_set_path, index_col=False)
    test_df['tag'] = tag
    result_path = '../result/result_image_model.txt'
    test_df.to_csv(result_path, index=False)

In [None]:
predict(best_model)