In [1]:
import os
import time
import os.path as osp

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from torchvision import datasets, transforms, models
import torchvision

from PIL import Image, ImageFilter
import matplotlib.pyplot as plt

from sklearn.metrics import f1_score


In [2]:
# # random seed
# SEED = 1 
# NUM_CLASS = 10

# Training
BATCH_SIZE = 128
NUM_EPOCHS = 300
EVAL_INTERVAL=1
SAVE_DIR = './log'


# Optimizer
LEARNING_RATE = 1e-4
MOMENTUM = 0.9
STEP=5
GAMMA=0.5


In [3]:
import os
import re
from PIL import Image
from torch.utils.data import Dataset

class yfcc100ImageDataset(Dataset):
    def __init__(self, root_dir, train, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        self.classes = set()

        for folder_name in os.listdir(root_dir):
            train_folder_path = os.path.join(root_dir, folder_name, 'train')
            test_folder_path = os.path.join(root_dir, folder_name, 'test')

            # 检查是否同时存在 train 和 test 子文件夹
            if os.path.isdir(train_folder_path) and os.path.isdir(test_folder_path):
                if train:
                    images_folder_path = os.path.join(train_folder_path, 'images/')
                else:
                    images_folder_path = os.path.join(test_folder_path, 'images/')

                folder_name_modified = re.sub(r'\d+$', '', folder_name.replace('_', ' '))

                if os.path.isdir(images_folder_path):
                    for image_name in os.listdir(images_folder_path):
                        image_path = os.path.join(images_folder_path, image_name)

                        # 检查是否为文件且不是隐藏文件或目录
                        if os.path.isfile(image_path) and not image_name.startswith('.') and image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                            self.samples.append((image_path, folder_name_modified.strip()))
                            self.classes.add(self.samples[-1][1])

        self.classes = list(self.classes)

    def __len__(self): 
        return len(self.samples) 
    
    def __getitem__(self, idx): 
        image_path, caption = self.samples[idx]
        image = Image.open(image_path).convert('RGB') # 确保图片格式为RGB
        if self.transform: 
            image = self.transform(image) 
        return image, self.classes.index(caption)


In [4]:
transform = transforms.Compose([
    transforms.Resize(size=224),
    transforms.CenterCrop(size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_set=yfcc100ImageDataset(root_dir='/data/lab/STA303-Exercise03/data/yfcc100/OANet/yfcc100m', train=True,transform=transform) 
test_set=yfcc100ImageDataset(root_dir='/data/lab/STA303-Exercise03/data/yfcc100/OANet/yfcc100m', train=False,transform=transform)
train_dataloader=DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_dataloader=DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)



In [5]:
class_descriptions = train_set.classes 
print(len(train_set.classes))
print(f"train_set.classes: {train_set.classes}")
print(len(test_set.classes))
print(f"test_set.classes: {test_set.classes}")

# 假设 train_set.classes 和 test_set.classes 已经定义并且是两个列表

# 将列表转换为集合
train_classes_set = set(train_set.classes)
test_classes_set = set(test_set.classes)

# 找出只在 train_set 中的元素
only_in_train = train_classes_set - test_classes_set

# 找出只在 test_set 中的元素
only_in_test = test_classes_set - train_classes_set

# 打印结果
print(f"Elements only in train_set: {only_in_train}")
print(f"Elements only in test_set: {only_in_test}")


53
train_set.classes: ['temple nara japan', 'natural history museum london', 'statue of liberty', 'sistine chapel ceiling', 'old town square prague clock', 'big ben', 'taj mahal', 'florence cathedral side', 'palace of westminster', 'mount rushmore', 'piazza dei miracoli', 'national gallery london', 'brandenburg gate', 'grand central terminal new york', 'trevi fountain', 'st vitus cathedral', 'pantheon interior', 'pieta michelangelo', 'grand place brussels', 'petra jordan', 'st pauls cathedral', 'old town square prague', 'blue mosque interior', 'united states capitol rotunda', 'taj mahal entrance', 'pantheon exterior', 'palace of versailles chapel', 'notre dame rosary window', 'colosseum exterior', 'vatican museum ceiling', 'florence cathedral dome interior', 'lincoln memorial', 'louvre', 'lincoln memorial statue', 'westminster abbey', 'london bridge', 'paris opera', 'palazzo pubblico', 'st peters basilica interior', 'temple kyoto japan', 'colosseum interior', 'st peters square', 'hagia

In [6]:
import torch.nn as nn

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(128 * 28 * 28, 512)
        self.dropout1 = nn.Dropout(0.5)
        self.relu4 = nn.ReLU()

        self.fc2 = nn.Linear(512, 53)

    def forward(self, x):
        x = self.pool(self.relu1(self.bn1(self.conv1(x))))
        x = self.pool(self.relu2(self.bn2(self.conv2(x))))
        x = self.pool(self.relu3(self.bn3(self.conv3(x))))

        x = x.view(-1, 128 * 28 * 28)

        x = self.relu4(self.dropout1(self.fc1(x)))
        x = self.fc2(x)
        return x


In [7]:
device="cpu"
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = ConvNet()
model.to(device)

ConvNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=100352, out_features=512, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (relu4): ReLU()
  (fc2): Linear(in_features=512, out_features=53, bias=True)
)

In [8]:
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
criterion = nn.CrossEntropyLoss()

In [9]:
def train_batch(model, image, target):

    output = model(image)
    loss = criterion(output, target)

    return output, loss

In [10]:
def test_batch(model, image, target):

    output = model(image)
    loss = criterion(output, target)
        
    return output, loss

In [None]:
training_loss = []
training_acc = []
testing_loss = []
testing_acc = []
start_time = time.time()

# 日志文件
log_file = 'training_log.txt'

for epoch in range(NUM_EPOCHS):
    model.train()
    torch.cuda.empty_cache()

    running_cls_loss = 0.0
    running_cls_corrects = 0

    for batch_idx, (image, target) in enumerate(train_dataloader):
        #print(f"for epoch{epoch},batch_idx{batch_idx}, label in train is: {target}")
        image = image.to(device)
        target = target.to(device)
        outputs, loss = train_batch(model, image, target)
        _, preds = torch.max(outputs, 1)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_cls_loss += loss.item()
        running_cls_corrects += torch.sum(preds == target.data)

    epoch_loss = running_cls_loss / len(train_set)
    epoch_acc = running_cls_corrects.double() / len(train_set)

    training_loss.append(epoch_loss)
    training_acc.append(epoch_acc.cpu().detach().numpy())

    # 在每个epoch结束时计算F1分数
    all_preds = []
    all_targets = []
    model.eval()
    with torch.no_grad():
        for image, target in train_dataloader:
            image = image.to(device)
            target = target.to(device)
            outputs = model(image)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    f1 = f1_score(all_targets, all_preds, average='weighted')
    print(f'Epoch: {epoch+1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} F1 Score: {f1:.4f}')

    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch + 1) == NUM_EPOCHS:
        val_loss = 0.0
        val_corrects = 0

        for batch_idx, (image, target) in enumerate(test_dataloader):
            #print(f"for epoch{epoch},batch_idx{batch_idx}, label in test is: {target}")
            image = image.to(device)
            target = target.to(device)
            outputs, loss = test_batch(model, image, target)
            _, preds = torch.max(outputs, 1)

            val_loss += loss.item()
            val_corrects += torch.sum(preds == target.data)

        val_loss = val_loss / len(test_set)
        val_acc = val_corrects.double() / len(test_set)
        testing_loss.append(val_loss)
        testing_acc.append(val_acc.cpu().detach().numpy())

        print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')

    # 日志记录
    with open(log_file, 'a') as f:
        f.write(f'Epoch {epoch+1}/{NUM_EPOCHS}\n')
        f.write(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} F1 Score: {f1:.4f}\n')
        f.write(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}\n')
        f.write('\n')

    # 模型保存
    if (epoch + 1) == NUM_EPOCHS:
        state = {
            'state_dict': model.state_dict(),
            'acc': epoch_acc,
            'epoch': epoch + 1
        }

        if not os.path.exists(SAVE_DIR):
            os.makedirs(SAVE_DIR)

        torch.save(state, osp.join(SAVE_DIR, f'checkpoint_{epoch+1}.pth'))

end_time = time.time()
duration = end_time - start_time
print(f"训练模型用时：{duration}秒")

Epoch: 1/300 Train Loss: 0.0239 Acc: 0.2463 F1 Score: 0.3500
Test Loss: 0.0195 Acc: 0.3810
Epoch: 2/300 Train Loss: 0.0182 Acc: 0.4168 F1 Score: 0.4927
Test Loss: 0.0163 Acc: 0.4978
Epoch: 3/300 Train Loss: 0.0155 Acc: 0.5043 F1 Score: 0.5798
Test Loss: 0.0141 Acc: 0.5757
Epoch: 4/300 Train Loss: 0.0137 Acc: 0.5618 F1 Score: 0.6364
Test Loss: 0.0127 Acc: 0.6170
Epoch: 5/300 Train Loss: 0.0124 Acc: 0.6073 F1 Score: 0.6629
Test Loss: 0.0118 Acc: 0.6350
Epoch: 6/300 Train Loss: 0.0112 Acc: 0.6471 F1 Score: 0.6997
Test Loss: 0.0107 Acc: 0.6701
Epoch: 7/300 Train Loss: 0.0103 Acc: 0.6714 F1 Score: 0.7295
Test Loss: 0.0100 Acc: 0.6998
Epoch: 8/300 Train Loss: 0.0096 Acc: 0.6963 F1 Score: 0.7496
Test Loss: 0.0095 Acc: 0.7133
Epoch: 9/300 Train Loss: 0.0089 Acc: 0.7151 F1 Score: 0.7749
Test Loss: 0.0090 Acc: 0.7284
Epoch: 10/300 Train Loss: 0.0083 Acc: 0.7356 F1 Score: 0.7930
Test Loss: 0.0085 Acc: 0.7365
Epoch: 11/300 Train Loss: 0.0078 Acc: 0.7526 F1 Score: 0.8075
Test Loss: 0.0083 Acc: 0.74

In [None]:
"""
training_loss = []
training_acc = []
testing_loss = []
testing_acc = []
start_time = time.time()  # 记录开始时间
all_preds = []
all_targets = []
# 日志文件
log_file = 'training_log.txt'
for epoch in range(NUM_EPOCHS):
    model.train()
    torch.cuda.empty_cache()

    ##########################
    ### Training
    ##########################

    running_cls_loss = 0.0
    running_cls_corrects = 0

    for batch_idx, (image, target) in enumerate(train_dataloader):

        image = image.to(device)
        target = target.to(device)
        #######################
        # 为使用L1loss function：
       # target_one_hot = one_hot_encoding(target, 10)  # 转换成 one-hot 编码


        # train model
       # outputs, loss = train_batch(model, image, target_one_hot)  # 使用 one-hot 编码后的目标
        
        outputs, loss = train_batch(model, image, target)
        #########################
        _, preds = torch.max(outputs, 1)

        
        loss_data = loss.data.item()
        if np.isnan(loss_data):
            raise ValueError('loss is nan while training')
        running_cls_loss += loss.item()
        running_cls_corrects += torch.sum(preds == target.data)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    epoch_loss = running_cls_loss / len(train_set)
    epoch_acc = running_cls_corrects.double() / len(train_set)

    print(f'Epoch: {epoch+1}/{NUM_EPOCHS} Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    
    #########
    all_preds.extend(preds.cpu().numpy())
    all_targets.extend(target.cpu().numpy())
    f1 = f1_score(all_targets, all_preds, average='weighted')
    print(f"f1 score in {epoch}th epoch is {f1}")

############
    training_loss.append(epoch_loss)
    training_acc.append(epoch_acc.cpu().detach().numpy())


    ##########################
    ### Testing
    ##########################
    # # eval model during training or in the last epoch
    
    if (epoch + 1) % EVAL_INTERVAL == 0 or (epoch +1) == NUM_EPOCHS:
        print('Begin test......')
        model.eval()
    
        val_loss = 0.0
        val_corrects = 0

        for batch_idx, (image, target) in enumerate(test_dataloader):

            image = image.to(device)
            target = target.to(device)

            # test model
            ##################
          #  target_one_hot = one_hot_encoding(target, 10)  # 转换成 one-hot 编码
            outputs, loss = test_batch(model, image, target)
          #  outputs, loss = test_batch(model, image, target_one_hot)  # 使用 one-hot 编码后的目标
            #######################
            _, preds = torch.max(outputs, 1)
            
            val_loss += loss.item()
            val_corrects += torch.sum(preds == target.data)

        val_loss = val_loss / len(test_set)
        val_acc = val_corrects.double() / len(test_set)
        print(f'Test Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
        testing_loss.append(val_loss)
        testing_acc.append(val_acc.cpu().detach().numpy())
    with open(log_file, 'a') as f:
        f.write(f'Epoch {epoch+1}/{NUM_EPOCHS}\n')
        f.write(f'Train Accuracy: {epoch_acc:.4f}%\n')
        f.write(f'Test Accuracy: {val_acc:.4f}%\n')
        f.write('\n')

        # save the model in last epoch
        if (epoch +1) == NUM_EPOCHS:
            
            state = {
            'state_dict': model.state_dict(),
            'acc': epoch_acc,
            'epoch': (epoch+1),
            }

            # check the dir
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)

            # save the state
            torch.save(state, osp.join(SAVE_DIR, 'checkpoint_%s.pth' % (str(epoch+1))))
            
end_time = time.time()  # 记录结束时间
duration = end_time - start_time  # 计算训练时间
print(f"训练模型用时：{duration}秒")
