In [1]:
import os
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import cv2
import matplotlib.pyplot as plt
import random
import joblib
import json
import csv
from sklearn.utils import shuffle
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torchvision.transforms import ToTensor
from tqdm import tqdm


In [9]:
# 画像のサイズ
h, w = 64, 64

# 画像作成する文字（ひらがな、Noneは白い画像を意味）
text_options = list("あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわゐゑをん") + [None]

font_paths = [
    "C:/Windows/Fonts/msmincho.ttc",  # 明朝体1
    "C:/Windows/Fonts/yumin.ttf",  # 明朝体2
    "C:/Windows/Fonts/BIZ-UDMinchoM.ttc",    # 明朝体3
    "C:/Windows/Fonts/HGRMB.ttc",   #明朝体4 
    "C:/Windows/Fonts/HGRME.ttc",   #明朝体5
    "C:/Windows/Fonts/msgothic.ttc", # ゴシック体1
    "C:/Windows/Fonts/UDDigiKyokashoN-R.ttc",    #教科書体1
    "C:/Windows/Fonts/UDDigiKyokashoN-B.ttc",    #教科書体2
    "C:/Windows/Fonts/HGRGY.ttc",       #行書体1
    "C:/Windows/Fonts/HGRSKP.ttf",      #楷書体1
]

# 結果を保存するリスト
imgs = []
labels = []  # テキストかNoneを保存するリスト

for text in text_options:
    for font_path in font_paths:
        for i in range(500):
            # ランダムに25〜60の範囲でフォントサイズを選択
            font_size = random.randint(30, 70)
            font = ImageFont.truetype(font_path, font_size)

            # 画像を白背景で生成
            img = Image.new("L", (w, h), "white")
            draw = ImageDraw.Draw(img)

            # テキストがNoneでない場合にのみ描画
            if text is not None:
                
                offset_x = random.randint(-20, 20) 
                offset_y = random.randint(-20, 20)
                position = (w // 2 + offset_x, h // 2 + offset_y)

                # テキストを描画
                draw.text(position, text, fill="black", font=font, anchor="mm")

            # 二値化: ピクセル値が255のときのみ255を保持、それ以外は0に設定
            binary = np.where(np.array(img) == 255, 255, 0).astype(np.uint8)
            
            # 画像をndarrayに変換してリストに保存
            imgs.append(np.array(binary))
            labels.append(text)


In [None]:
imgs, labels = shuffle(imgs, labels)

In [None]:
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, imgs, labels):
        self.imgs = imgs
        self.labels = labels

        # None を特別な処理として扱う
        self.label_map = {label: idx for idx, label in enumerate(sorted(label for label in set(labels) if label is not None))}
        self.label_map[None] = len(self.label_map)  # None を最後のラベルとして追加

        # transforms: そのままか180度回転をランダムに適用した後、左右に最大15度回転を追加
        self.transform = transforms.Compose([
            transforms.ToTensor(),  # 画像をTensorに変換
            transforms.RandomChoice([  # ランダムでどちらかを適用
                transforms.RandomRotation(degrees=[0, 0]),  # そのまま
                transforms.RandomRotation(degrees=[180, 180])  # 180度回転
            ]),
            transforms.RandomRotation(degrees=[-30, 30])  # -30度から30度の間でランダム回転
        ])

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        # 画像を取得して前処理
        img = self.imgs[idx].astype(np.float32) / 255.0  # 正規化
        img = self.transform(img)  # transforms を適用

        # ラベルを取得してインデックスに変換
        label = self.labels[idx]
        label_idx = self.label_map[label]

        return img, label_idx

In [None]:
# データセット作成
dataset = ImageDataset(imgs, labels)
n_classes = len(dataset.label_map)

In [None]:
# データセットの分割
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [None]:
# データローダーの作成
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
class CNN(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, 3, 1, 1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 32, 3, 1, 1)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 16, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(16)
        self.fc3 = nn.Linear(1024, out_channels)  

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)
        x = torch.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        x = torch.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)
        x = x.reshape((x.size(0), -1))  # 出力を平坦化
        x = self.fc3(x)  # 全結合層に入力
        y = F.log_softmax(x, dim=1)
        return y

In [None]:
# デバイス設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# モデル、損失関数、オプティマイザの設定
model = CNN(1, n_classes).to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [None]:
# 学習プロセス
epochs = 30
losses = []
accuracies = []

model.train()
for epoch in range(epochs):
    pbar = tqdm(train_loader)
    for X, y_true in pbar:
        X, y_true = X.to(device), y_true.to(device)
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred, y_true)
        loss.backward()
        optimizer.step()
        acc = (torch.argmax(y_pred, dim=1) == y_true).float().mean()
        losses.append(loss.item())
        accuracies.append(acc.item())
        pbar.set_description(f"Epoch {epoch+1} Loss: {loss.item():.4f} Acc: {acc.item():.4f}")

In [None]:
# モデルの評価
model.eval()
n_correct = 0
n_total = 0

with torch.no_grad():
    for X, y_true in tqdm(test_loader):
        X, y_true = X.to(device), y_true.to(device)
        y_pred = model(X)
        n_correct += (torch.argmax(y_pred, dim=1) == y_true).sum().item()
        n_total += y_true.size(0)

accuracy = n_correct / n_total
print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
ckpt = {
    "model": model.state_dict(),
    "optim": optimizer.state_dict(),
}
torch.save(ckpt, "ckpt.pth")