In [None]:
# 导入必要的库
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from facenet_pytorch import InceptionResnetV1  # 使用facenet-pytorch库中的预训练模型
import pandas as pd
import os
import glob
from PIL import Image
import numpy as np
import datetime

# 设置计算设备（优先使用GPU）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 加载预训练模型
facenet_model = InceptionResnetV1(pretrained=None).eval()

# 加载本地权重
state_dict = torch.load('vggface2.pth')

# 应用权重
facenet_model.load_state_dict(state_dict)
facenet_model.eval()

# -------------------- 1. 数据预处理 --------------------
# 定义训练数据增强和预处理流程
train_transform = transforms.Compose([
    transforms.RandomRotation(degrees=(0, 20)),
    transforms.RandomGrayscale(0.1),
    transforms.Resize((160, 160)),  # FaceNet标准输入尺寸为160x160
    transforms.RandomHorizontalFlip(),  # 随机水平翻转（数据增强）
    transforms.ColorJitter(brightness=.5, hue=.3),
    transforms.ToTensor(),  # 转换为Tensor格式（范围[0,1]）
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 标准化到[-1, 1]
])

# 加载训练数据集（需要调整数据集路径）
data_dir = './data/105_classes_pins_dataset'
dataset = datasets.ImageFolder(data_dir, transform=train_transform)

# 数据集划分（80%训练，20%验证）
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

# -------------------- 2. 加载预训练模型 --------------------
model = facenet_model

# -------------------- 3. 定义基础训练类 --------------------
class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        """训练步骤"""
        images, labels = batch
        out = self(images)  # 前向传播
        loss = F.cross_entropy(out, labels)  # 计算交叉熵损失
        return loss

    def validation_step(self, batch):
        """验证步骤"""
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)  # 计算准确率
        return {'val_loss': loss.detach(), 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        """验证周期结束聚合结果"""
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()  # 平均损失
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()     # 平均准确率
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

    def epoch_end(self, epoch, result):
        """周期结束日志打印"""
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))

# -------------------- 4. 定义分类模型 --------------------
num_classes = 105  # 根据实际类别数修改

class FaceNetClassifier(ImageClassificationBase):
    """FaceNet分类器（在预训练模型基础上添加全连接层）"""
    def __init__(self, backbone):
        super().__init__()
        self.backbone = backbone
        
        # 冻结预训练层（可选）
        for name, param in self.backbone.named_parameters():
            if 'block8' in name or 'block7' in name:  # 解冻后两层
                param.requires_grad = True
            else:
                param.requires_grad = False
        
        # 添加分类层（FaceNet输出维度512）
        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )  

    def forward(self, x):
        """前向传播"""
        embeddings = self.backbone(x)  # 提取特征嵌入
        return self.classifier(embeddings)  # 分类预测

# 实例化模型并移至设备
facenet_model = FaceNetClassifier(model).to(device)

# -------------------- 5. 训练配置 --------------------
from torch.optim import Adam
optimizer = Adam(facenet_model.parameters(), lr=0.001)  # 只优化分类器参数

def accuracy(outputs, labels):
    """计算准确率"""
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

# 训练函数
def fit(epochs, model, train_loader, val_loader, optimizer):
    history = []
    for epoch in range(epochs):
        # 训练阶段
        model.train()
        train_losses = []
        for batch in train_loader:
            images, labels = batch[0].to(device), batch[1].to(device)
            loss = model.training_step((images, labels))
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        # 验证阶段
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

# 验证函数
def evaluate(model, val_loader):
    model.eval()
    outputs = []
    for batch in val_loader:
        images, labels = batch[0].to(device), batch[1].to(device)
        outputs.append(model.validation_step((images, labels)))
    return model.validation_epoch_end(outputs)

# 开始训练
history = fit(30, facenet_model, train_loader, val_loader, optimizer)

# -------------------- 6. 测试与提交 --------------------



Epoch [0], train_loss: 3.0091, val_loss: 2.9705, val_acc: 0.3097
Epoch [1], train_loss: 1.6237, val_loss: 2.6915, val_acc: 0.3540
Epoch [2], train_loss: 1.1929, val_loss: 1.5127, val_acc: 0.6250
Epoch [3], train_loss: 0.9904, val_loss: 1.4449, val_acc: 0.6437
Epoch [4], train_loss: 0.8950, val_loss: 0.9802, val_acc: 0.7435
Epoch [5], train_loss: 0.8320, val_loss: 1.0078, val_acc: 0.7351
Epoch [6], train_loss: 0.7413, val_loss: 0.7835, val_acc: 0.7915
Epoch [7], train_loss: 0.7108, val_loss: 0.9123, val_acc: 0.7551
Epoch [8], train_loss: 0.6700, val_loss: 0.9814, val_acc: 0.7528
Epoch [9], train_loss: 0.6008, val_loss: 0.7232, val_acc: 0.8083
Epoch [10], train_loss: 0.6036, val_loss: 0.8955, val_acc: 0.7537
Epoch [11], train_loss: 0.5447, val_loss: 0.7276, val_acc: 0.8102
Epoch [12], train_loss: 0.5216, val_loss: 0.6415, val_acc: 0.8214
Epoch [13], train_loss: 0.5336, val_loss: 0.5883, val_acc: 0.8354
Epoch [14], train_loss: 0.5103, val_loss: 0.7287, val_acc: 0.7985
Epoch [15], train_lo

In [19]:
class TestDataset(torch.utils.data.Dataset):
    """测试数据集类"""
    def __init__(self, file_list, transform=None):
        self.file_list = file_list  # 测试文件路径列表
        self.transform = transform  # 预处理流程

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path).convert('RGB')  # 强制转换为RGB格式
        if self.transform:
            img = self.transform(img)
        file_id = os.path.basename(img_path).split('.')[0]  # 从文件名提取ID
        return img, file_id

# 测试数据预处理（与训练一致，去掉数据增强）
test_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 加载测试数据
test_dir = './data/test2'
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))
test_dataset = TestDataset(test_list, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)  # 保持顺序

def predict(model, test_loader, device):
    """预测函数"""
    model.eval()
    predictions = []
    file_ids = []
    with torch.no_grad():
        for data, fileid in test_loader:
            data = data.to(device)
            outputs = model(data)
            _, preds = torch.max(outputs, dim=1)
            predictions.extend(preds.cpu().numpy())  # 转回CPU处理
            file_ids.extend(fileid)
    return file_ids, predictions

# 生成预测结果
file_ids, predictions = predict(facenet_model, test_loader, device)

# 获取类别名称
class_names = dataset.classes

# 将预测索引转换为类别名称
predicted_labels = [class_names[pred] for pred in predictions]


In [20]:
# 创建DataFrame
submission_df = pd.DataFrame({'id': file_ids, 'label': predicted_labels})

# 保存为CSV文件
submission_df.to_csv('submission.csv', index=False)

In [22]:
import pandas as pd

name_to_submission_label_map = {
  "pins_Adriana Lima": 0,
  "pins_Alex Lawther": 1,
  "pins_Alexandra Daddario": 2,
  "pins_Alvaro Morte": 3,
  "pins_alycia dabnem carey": 4,
  "pins_Amanda Crew": 5,
  "pins_amber heard": 6,
  "pins_Andy Samberg": 7,
  "pins_Anne Hathaway": 8,
  "pins_Anthony Mackie": 9,
  "pins_Avril Lavigne": 10,
  "pins_barack obama": 11,
  "pins_barbara palvin": 12,
  "pins_Ben Affleck": 13,
  "pins_Bill Gates": 14,
  "pins_Bobby Morley": 15,
  "pins_Brenton Thwaites": 16,
  "pins_Brian J. Smith": 17,
  "pins_Brie Larson": 18,
  "pins_camila mendes": 19,
  "pins_Chris Evans": 20,
  "pins_Chris Hemsworth": 21,
  "pins_Chris Pratt": 22,
  "pins_Christian Bale": 23,
  "pins_Cristiano Ronaldo": 24,
  "pins_Danielle Panabaker": 25,
  "pins_Dominic Purcell": 26,
  "pins_Dwayne Johnson": 27,
  "pins_Eliza Taylor": 28,
  "pins_Elizabeth Lail": 29,
  "pins_elizabeth olsen": 30,
  "pins_ellen page": 31,
  "pins_elon musk": 32,
  "pins_Emilia Clarke": 33,
  "pins_Emma Stone": 34,
  "pins_Emma Watson": 35,
  "pins_gal gadot": 36,
  "pins_grant gustin": 37,
  "pins_Gwyneth Paltrow": 38,
  "pins_Henry Cavil": 39,
  "pins_Hugh Jackman": 40,
  "pins_Inbar Lavi": 41,
  "pins_Irina Shayk": 42,
  "pins_Jake Mcdorman": 43,
  "pins_Jason Momoa": 44,
  "pins_jeff bezos": 45,
  "pins_Jennifer Lawrence": 46,
  "pins_Jeremy Renner": 47,
  "pins_Jessica Barden": 48,
  "pins_Jimmy Fallon": 49,
  "pins_Johnny Depp": 50,
  "pins_Josh Radnor": 51,
  "pins_Katharine Mcphee": 52,
  "pins_Katherine Langford": 53,
  "pins_Keanu Reeves": 54,
  "pins_kiernen shipka": 55,
  "pins_Krysten Ritter": 56,
  "pins_Leonardo DiCaprio": 57,
  "pins_Lili Reinhart": 58,
  "pins_Lindsey Morgan": 59,
  "pins_Lionel Messi": 60,
  "pins_Logan Lerman": 61,
  "pins_Madelaine Petsch": 62,
  "pins_Maisie Williams": 63,
  "pins_margot robbie": 64,
  "pins_Maria Pedraza": 65,
  "pins_Marie Avgeropoulos": 66,
  "pins_Mark Ruffalo": 67,
  "pins_Mark Zuckerberg": 68,
  "pins_Megan Fox": 69,
  "pins_melissa fumero": 70,
  "pins_Miley Cyrus": 71,
  "pins_Millie Bobby Brown": 72,
  "pins_Morena Baccarin": 73,
  "pins_Morgan Freeman": 74,
  "pins_Nadia Hilker": 75,
  "pins_Natalie Dormer": 76,
  "pins_Natalie Portman": 77,
  "pins_Neil Patrick Harris": 78,
  "pins_Pedro Alonso": 79,
  "pins_Penn Badgley": 80,
  "pins_Rami Malek": 81,
  "pins_Rebecca Ferguson": 82,
  "pins_Richard Harmon": 83,
  "pins_Rihanna": 84,
  "pins_Robert De Niro": 85,
  "pins_Robert Downey Jr": 86,
  "pins_Sarah Wayne Callies": 87,
  "pins_scarlett johansson": 88,
  "pins_Selena Gomez": 89,
  "pins_Shakira Isabel Mebarak": 90,
  "pins_Sophie Turner": 91,
  "pins_Stephen Amell": 92,
  "pins_Taylor Swift": 93,
  "pins_Tom Cruise": 94,
  "pins_tom ellis": 95,
  "pins_Tom Hardy": 96,
  "pins_Tom Hiddleston": 97,
  "pins_Tom Holland": 98,
  "pins_Tuppence Middleton": 99,
  "pins_Ursula Corbero": 100,
  "pins_Wentworth Miller": 101,
  "pins_Zac Efron": 102,
  "pins_Zendaya": 103,
  "pins_Zoe Saldana": 104
}

def get_submission_label(class_name):
    # 直接使用 class_name 在映射中查找提交标签
    return name_to_submission_label_map.get(class_name, class_name)

# 读取 CSV 文件
df = pd.read_csv('submission.csv')

# 应用标签映射
df['label'] = df['label'].apply(get_submission_label)

# 按 'id' 列排序
try:
    df['id'] = df['id'].astype(int)  # 尝试将 'id' 转换为整数
    df = df.sort_values(by='id')     # 按数值排序
except ValueError:
    df = df.sort_values(by='id')     # 如果转换失败，按字符串排序

# 保存处理后的数据到新的 CSV 文件
df.to_csv('22211360121' + 'submission_{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d_%H%M%S')), index=False)

