In [1]:
# 画像フォルダパスを指定(今回は、openhouse2024_competition/test/images フォルダ)
test_data_root = '/src/openhouse2024_competition/test/images'

# csv の出力先path
output_csv_path = '/src/openhouse2024_competition/submit.csv'

# 保存したモデルのパスを指定
model_path = '/src/openhouse2024_competition/model_weight.pth'

/src/openhouse2024_competition/test

In [2]:
from PIL import Image
import os
import pandas as pd

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [3]:
# デバイスの確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
# 推論はフォルダを再構成する必要がないので、torch.utils.data.Dataset を継承してデータ読み込みクラスを定義する
def sort_key(fname):
    return int(''.join(filter(str.isdigit, fname)))

class CustomImageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.image_paths = sorted([os.path.join(image_dir, fname) for fname in os.listdir(image_dir)], key=sort_key)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image

In [5]:
# transformの定義
transform_test = transforms.Compose([
    transforms.Resize((256, 256)),  # サイズを128x128にリサイズ
    transforms.ToTensor(),  # テンソル変換
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 正規化
    ])

# データセットの読み込み
test_set = CustomImageDataset(image_dir=test_data_root, transform=transform_test)
test_loader = DataLoader(test_set, batch_size=1000, shuffle=False)

In [6]:
# モデルのロード
import torch
import torch.nn as nn
import os

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


    def save_checkpoint(cls, epoch, model, optimizer, history, path='checkpoint.pth'):
        state = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'history': history
        }
        torch.save(state, path)

    @classmethod
    def load_checkpoint(cls, path='checkpoint.pth'):
        if os.path.isfile(path):
            checkpoint = torch.load(path)
            return checkpoint['epoch'], checkpoint['model_state_dict'], checkpoint['optimizer_state_dict'], checkpoint['history']
        else:
            print("No checkpoint found.")
            return 0, None, None, None

# Usage
model = ResNet(ResidualBlock, [2, 2, 2, 2], num_classes=10)


In [7]:
# 推論
net = model.to(device)

net.load_state_dict(torch.load(model_path))
net.eval()

# 推論の実行
all_preds = []
with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs.to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())

  net.load_state_dict(torch.load(model_path))


このままだと、all_preds に格納された予測ラベルは 0~9 の数値ラベルなので元に戻す。

*ひらがなで学習した人*

In [8]:
img_data = pd.read_csv('/src/openhouse2024_competition/test/images_info.csv', header = None)
img_data.head()

# クラス名を所得
# 変更箇所（sorted関数を追加）
classes = sorted(img_data[1].unique())
classes

['あ', 'い', 'お', 'に', 'ぬ', 'ね', 'は', 'め', 'れ', 'ろ']

In [9]:
# 数値ラベル -> ひらがなラベル
class_idx = {'あ': 0, 'い': 1, 'お': 2, 'に': 3, 'ぬ': 4, 'ね': 5, 'は': 6, 'め': 7, 'れ': 8, 'ろ': 9} # チュートリアルの print(test_dataset.class_to_idx)
inv_class_idx = {v: k for k, v in class_idx.items()}
hiragana_pred = [inv_class_idx[pred] for pred in all_preds]

In [10]:
# CSVファイルに保存
output_df = pd.DataFrame({
    'Predicted_Label': hiragana_pred
})
print(output_df)

output_df.to_csv(output_csv_path, index=False, header=False)

    Predicted_Label
0                 ね
1                 ろ
2                 ろ
3                 あ
4                 い
..              ...
995               お
996               め
997               ろ
998               ね
999               に

[1000 rows x 1 columns]
