In [2]:
import os
import time
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # 用于可视化进度条

# 设置路径
data_dir = "C:/Users/cpj/Desktop/ECNU/大三第一学期/当代人工智能/实验5/data"
train_file = "C:/Users/cpj/Desktop/ECNU/大三第一学期/当代人工智能/实验5/train.txt"
test_file = "C:/Users/cpj/Desktop/ECNU/大三第一学期/当代人工智能/实验5/test_without_label.txt"
output_dir = "C:/Users/cpj/Desktop/ECNU/大三第一学期/当代人工智能/实验5/output"

# 创建输出目录（如果不存在）
os.makedirs(output_dir, exist_ok=True)

# 加载训练集标签
train_df = pd.read_csv(train_file, header=None, names=["guid", "label"])

# 清理数据确保标签合法
valid_labels = ["positive", "neutral", "negative"]  # 只允许这三种标签
train_df = train_df[train_df["label"].isin(valid_labels)]  # 过滤出合法数据

# 验证清理后的标签
print(f"Training labels after cleanup: {train_df['label'].unique()}")

# 文本预处理（使用 TF-IDF）
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # 限制最大特征数量
train_texts = []

# 读取文本内容
for guid in train_df["guid"]:
    text_path = os.path.join(data_dir, f"{guid}.txt")
    try:
        with open(text_path, "r", encoding="utf-8") as file:
            train_texts.append(file.read())
    except UnicodeDecodeError:
        try:
            with open(text_path, "r", encoding="gbk") as file:
                train_texts.append(file.read())
        except Exception as e:
            print(f"Warning: Unable to read text file {text_path} due to error: {e}")
            train_texts.append("[TEXT_ERROR]")  # 占位符
    except FileNotFoundError:
        print(f"Warning: File not found - {text_path}")
        train_texts.append("[TEXT_MISSING]")  # 占位符

# 对文本进行 TF-IDF 转换
X_text = tfidf_vectorizer.fit_transform(train_texts).toarray()

# 图像预处理
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 标签编码
label_encoder = LabelEncoder()
y_labels = label_encoder.fit_transform(train_df["label"])

# 打印编码后的标签及其范围
print(f"Classes found by LabelEncoder: {label_encoder.classes_}")
print(f"Encoded labels range: {y_labels.min()} to {y_labels.max()}")

# 加载图像
image_tensors = []
for guid in train_df["guid"]:
    image_path = os.path.join(data_dir, f"{guid}.jpg")
    try:
        image = Image.open(image_path).convert("RGB")
    except FileNotFoundError:
        print(f"Warning: Image file not found - {image_path}")
        image = Image.new("RGB", (224, 224))  # 如果图片不存在，生成空白图片
    image_tensor = image_transform(image)
    image_tensors.append(image_tensor)

# 切分数据集
X_train_text, X_val_text, X_train_images, X_val_images, y_train, y_val = train_test_split(
    X_text, image_tensors, y_labels, test_size=0.2, random_state=42
)

# 创建数据集类
class MultimodalDataset(Dataset):
    def __init__(self, text_features, image_tensors, labels):
        self.text_features = text_features
        self.image_tensors = image_tensors
        self.labels = labels

    def __len__(self):
        return len(self.text_features)

    def __getitem__(self, idx):
        return torch.tensor(self.text_features[idx], dtype=torch.float32), self.image_tensors[idx], torch.tensor(self.labels[idx], dtype=torch.long)

# 创建数据集和DataLoader
train_dataset = MultimodalDataset(X_train_text, X_train_images, y_train)
val_dataset = MultimodalDataset(X_val_text, X_val_images, y_val)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# 模型设计
class MultimodalEmotionModel(nn.Module):
    def __init__(self, text_input_dim, num_classes):
        super(MultimodalEmotionModel, self).__init__()

        # 文本特征提取
        self.text_fc = nn.Linear(text_input_dim, 256)

        # 图像特征提取
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()  # 去掉ResNet的最后一层全连接层
        self.image_fc = nn.Linear(512, 256)  # ResNet18输出维度为512

        # 融合层
        self.fusion_fc = nn.Linear(512, num_classes)  # 动态设置输出类别数

    def forward(self, text_features, image_tensor):
        # 文本特征
        text_output = self.text_fc(text_features)

        # 图像特征
        image_features = self.resnet(image_tensor)
        image_features = self.image_fc(image_features)

        # 融合特征
        combined_features = torch.cat((text_output, image_features), dim=1)
        output = self.fusion_fc(combined_features)
        return output

# 确定输出类别数
num_classes = len(label_encoder.classes_)

# 实例化模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultimodalEmotionModel(text_input_dim=X_text.shape[1], num_classes=num_classes).to(device)

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-5)

# 训练过程
def train(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for text_features, image_tensor, labels in tqdm(dataloader, desc="Training"):
        text_features = text_features.to(device)
        image_tensor = image_tensor.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        output = model(text_features, image_tensor)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    return avg_loss

# 评估过程
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for text_features, image_tensor, labels in tqdm(dataloader, desc="Evaluating"):
            text_features = text_features.to(device)
            image_tensor = image_tensor.to(device)
            labels = labels.to(device)

            output = model(text_features, image_tensor)
            loss = criterion(output, labels)
            total_loss += loss.item()

            _, predicted = torch.max(output, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy

# 训练和评估
start_time = time.time()
for epoch in range(5):
    print(f"Epoch {epoch+1}/{5}")

    train_loss = train(model, train_dataloader, optimizer, criterion, device)
    val_loss, val_accuracy = evaluate(model, val_dataloader, criterion, device)

    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Accuracy = {val_accuracy:.4f}")

# 总时间
end_time = time.time()
print(f"Total Training Time: {end_time - start_time:.2f} seconds")

# 保存模型
model_save_path = os.path.join(output_dir, "emotion_model.pth")
torch.save(model.state_dict(), model_save_path)
print(f"Model saved at {model_save_path}")

Training labels after cleanup: ['negative' 'neutral' 'positive']
Classes found by LabelEncoder: ['negative' 'neutral' 'positive']
Encoded labels range: 0 to 2




Epoch 1/5


Training: 100%|██████████| 100/100 [07:32<00:00,  4.52s/it]
Evaluating: 100%|██████████| 25/25 [00:39<00:00,  1.59s/it]


Epoch 1: Train Loss = 0.8999, Val Loss = 0.8422, Val Accuracy = 0.6025
Epoch 2/5


Training: 100%|██████████| 100/100 [07:23<00:00,  4.43s/it]
Evaluating: 100%|██████████| 25/25 [00:37<00:00,  1.48s/it]


Epoch 2: Train Loss = 0.7844, Val Loss = 0.8087, Val Accuracy = 0.6325
Epoch 3/5


Training: 100%|██████████| 100/100 [07:27<00:00,  4.48s/it]
Evaluating: 100%|██████████| 25/25 [00:35<00:00,  1.43s/it]


Epoch 3: Train Loss = 0.7183, Val Loss = 0.7914, Val Accuracy = 0.6312
Epoch 4/5


Training: 100%|██████████| 100/100 [07:31<00:00,  4.51s/it]
Evaluating: 100%|██████████| 25/25 [00:38<00:00,  1.55s/it]


Epoch 4: Train Loss = 0.6605, Val Loss = 0.7858, Val Accuracy = 0.6338
Epoch 5/5


Training: 100%|██████████| 100/100 [07:24<00:00,  4.44s/it]
Evaluating: 100%|██████████| 25/25 [00:35<00:00,  1.42s/it]

Epoch 5: Train Loss = 0.5868, Val Loss = 0.7892, Val Accuracy = 0.6388
Total Training Time: 2425.80 seconds
Model saved at C:/Users/cpj/Desktop/ECNU/大三第一学期/当代人工智能/实验5/output\emotion_model.pth



