In [2]:
# 第四问

In [None]:
# 针对问题四，附件四给出部分甲骨文图像及其对应的简体中文，我们建立inception_v3分类模型，通过训练数据对模型进行微调。
# 将得到的在甲骨文文字识别任务上微调后的inception_v3模型对测试集数据进行文字识别。
# 将识别结果保存，写入论文

In [None]:
# inception model 训练代码：
import json
import os

import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.metrics import f1_score
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

# 检查CUDA是否可用，并设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# 构建自定义数据集
class CustomDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        image = Image.open(self.file_paths[idx]).convert('RGB')
        label = torch.tensor(self.labels[idx], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label


# 超参数
batch_size = 32
num_epochs = 50
learning_rate = 0.001
num_classes = 76  # 假设有10个类别

# 转换图像
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_file_dir = "q4_data/train/image"
train_file_paths = os.listdir(train_file_dir)
# 给每个文件路径添加文件夹路径
train_file_paths = [os.path.join(train_file_dir, file_name) for file_name in train_file_paths]
# 从JSON文件中读取数据
with open("q4_train_inception.json", "r") as json_file:
    train_labels = json.load(json_file)
# 数据加载
train_dataset = CustomDataset(train_file_paths, train_labels, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# 加载预训练的InceptionV3模型
model = models.inception_v3(pretrained=True)
model.aux_logits = False  # 禁用辅助输出
# # 冻结模型参数
# for param in model.parameters():
#     param.requires_grad = False

# 修改最后一层全连接层以适应多标签分类任务
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.ReLU(inplace=True),
    nn.Linear(512, num_classes),
    nn.Sigmoid()  # 多标签分类使用Sigmoid激活函数
)

model = model.to(device)
# 定义损失函数和优化器
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 学习率调度器
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

# 训练模型
total_step = len(train_loader)
for epoch in tqdm(range(num_epochs)):
    model.train()
    for i, (images, labels) in enumerate(tqdm(train_loader)):
        images = images.to(device)
        labels = labels.to(device)
        # 前向传播
        outputs = model(images)
        # 计算损失
        loss = criterion(outputs, labels)
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 1000 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

    # 调整学习率
    scheduler.step()

    # 评估循环
    model.eval()
    with torch.no_grad():
        # 初始化预测和标签列表
        all_preds = []
        all_labels = []
        correct_predictions = 0
        total_predictions = 0
        for images, labels in tqdm(train_loader):
            images = images.to(device)
            labels = labels.to(device)
            # 前向传播
            outputs = model(images)
            predicted = outputs > 0.5
            # 计算准确率
            correct_predictions += (predicted == labels.byte()).all(1).sum().item()
            total_predictions += labels.size(0)
            # 收集预测和真实标签
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        accuracy = correct_predictions / total_predictions
        f1 = f1_score(all_labels, all_preds, average='micro')
        print('Epoch [{}/{}], Accuracy: {:.4f}, F1 Score: {:.4f}'.format(epoch + 1, num_epochs, accuracy, f1))

    # 保存模型
    torch.save(model.state_dict(), f'ckpt/inceptionv3_ft_{epoch}_f1_{f1:.4f}_acc_{accuracy:.4f}.pth')



In [None]:
上述代码需要加以解释：赛事方提供的附件四中训练集有40617张甲骨文图像
为了满足inception模型的输入，我们对数据进行预处理，将每一个文字的作为一个类别
一共有76类，我们建立了一个python列表，里面存储了40617张甲骨文图像的labels
数据预处理代码如下：

In [None]:
import json
import os
import re
import shutil

from tqdm import tqdm


def find_number_before_dash(string):
    match = re.search(r'\d+(?=-)', string)
    if match:
        return int(match.group())
    else:
        return None


path = "q4_data/train/image"
txt_list = os.listdir(path)
# 创建包含 40617 个列表的列表，每个列表长度为 76
nested_list = [[0] * 76 for _ in range(40617)]
tem = 0
for tl in tqdm(txt_list):
    result = find_number_before_dash(tl)
    nested_list[tem][result - 1] = 1
    tem = tem + 1

# 将嵌套列表保存为 JSON 文件
with open("q4_train_inception.json", "w") as json_file:
    json.dump(nested_list, json_file)


In [None]:
利用我们预处理后的数据，在inception模型上训练11个epoch后，在训练集上，模型准确率达到99.4%，F1值达到99.6%
接下来，我们利用训练好的inception模型对附件四甲骨文原始图像进行文字自动识别

In [2]:
# 在利用inception识别文字之前，我们先对附件四图像预处理

In [None]:
# 使用第二问训练好的yolo，我们将附件四的五十张图像进行甲骨文的自动识别与分割
# 将自动识别、分割后的结果保存下来进行文字识别
# 文字识别代码如下：
import json
import os

import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.metrics import f1_score
from tqdm import tqdm

# 检查CUDA是否可用，并设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# 构建自定义数据集
class CustomDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        image = Image.open(self.file_paths[idx]).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image


# 转换图像
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_file_dir = "test_image"# 将需要预测的图像放在此文件夹
test_file_path = os.listdir(test_file_dir)
# 给每个文件路径添加文件夹路径
test_file_paths = [os.path.join(test_file_dir, file_name) for file_name in test_file_path]
# 数据加载
test_dataset = CustomDataset(test_file_paths,transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# 加载预训练的InceptionV3模型
model = models.inception_v3(pretrained=False)
model.aux_logits = False  # 禁用辅助输出

# 修改最后一层全连接层以适应多标签分类任务
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.ReLU(inplace=True),
    nn.Linear(512, 76),
    nn.Sigmoid()  # 多标签分类使用Sigmoid激活函数
)
# 加载微调后的模型权重
model_path = 'ckpt/inceptionv3_ft_10_f1_0.9962_acc_0.9940.pth'  # 替换为你的模型权重文件路径
model.load_state_dict(torch.load(model_path))
model = model.to(device)
model.eval()
with torch.no_grad():
    for images in tqdm(test_loader):
        images = images.to(device)
        outputs = model(images)
        predicted = outputs > 0.5  # 使用阈值 0.5 来确定标签
    print(predicted)
