In [1]:
import os
from pathlib import Path

project_root = Path().resolve()

In [2]:
import pandas as pd

path_to_labels = Path().resolve()/ "data" / "ETL8G"/ "ETL8G_01_unpack"/"meta.csv"

labels_df = pd.read_csv(path_to_labels)

labels = labels_df["char"]
labels = labels.tolist()


In [3]:
all_labels = labels.copy()
labels_copy = all_labels.copy()

for i in range(31):
    labels_copy_2 = labels_copy.copy()
    all_labels.extend(labels_copy_2)

all_labels.extend(labels_copy[:956])

In [4]:
# go through each folder
path_label_list = []
etl_dir = Path().resolve() / "data" / "ETL8G"
for folder in os.listdir(etl_dir):
    if "unpack" in folder:
        folder_path = os.path.join(etl_dir, folder)

        # go through each png in folder
        for fname in os.listdir(folder_path):
            if fname.endswith(".png"):
                fpath = os.path.join(folder_path, fname)
                # take filename without extension
                idx = int(os.path.splitext(fname)[0])
                # compute label
                label = idx % 956
                path_label_list.append((fpath, label))

print(path_label_list[956])


('C:\\Users\\alicj\\PycharmProjects\\KanjiRecognitionModel\\data\\ETL8G\\ETL8G_01_unpack\\00956.png', 0)


In [5]:
# label_index_list = []
# for path, label in path_label_list:
#     label_index_list.append(label)
# print(label_index_list[95])

In [None]:
from torch.utils.data import Dataset
from PIL import Image
import torch
import torchvision.transforms as T

class ImageDataset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        path, label = self.data[idx]
        img = Image.open(path).convert("L")
        if self.transform:
            img = self.transform(img)
        else:
            img = T.ToTensor()(img)
        return img, torch.tensor(label, dtype=torch.long)



In [48]:
transform = T.Compose([
    T.Resize((64, 64)),
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5]),
])

imageDataset = ImageDataset(path_label_list, transform)
img, lab = imageDataset[0]
# print(img.size)
print(lab)
print(type(img), img.shape, lab)

tensor(0)
<class 'torch.Tensor'> torch.Size([1, 64, 64]) tensor(0)


In [8]:
from sklearn.model_selection import train_test_split

# first split train vs temp (val+test)
train_data, temp_data = train_test_split(path_label_list[:70000], test_size=0.3, random_state=42, stratify=[d[1] for d in path_label_list[:70000]]) # used stratify to preserve the same class proportions

# then split temp into val and test
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, stratify=[d[1] for d in temp_data])


In [9]:
train_dataset = ImageDataset(train_data, transform)
val_dataset   = ImageDataset(val_data, transform)
test_dataset  = ImageDataset(test_data, transform)


In [10]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=32, num_workers=0)
test_loader  = DataLoader(test_dataset, batch_size=32, num_workers=0)


In [11]:
import torch.nn as nn
import torch.nn.functional as F
class ConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

    def forward(self, x):
        return self.block(x)

In [12]:
class CNN_Improved(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # 64x64 -> 32x32
        self.block1 = ConvBlock(1, 32)
        # 32x32 -> 16x16
        self.block2 = ConvBlock(32, 64)
        # 16x16 -> 8x8
        self.block3 = ConvBlock(64, 128)

        self.gap  = nn.AdaptiveAvgPool2d(1)   # 8x8 -> 1x1
        self.drop = nn.Dropout(0.3)
        self.fc   = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)          # [B, 128, 8, 8]
        x = self.gap(x)             # [B, 128, 1, 1]
        x = torch.flatten(x, 1)     # [B, 128]
        x = self.drop(x)
        x = self.fc(x)
        return x

In [14]:
model = CNN_Improved(956)
model.load_state_dict(torch.load("best_model3.pt", map_location="cpu"))
model.eval()

CNN_Improved(
  (block1): ConvBlock(
    (block): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (block2): ConvBlock(
    (block): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
   

In [19]:
correct, total =0, 0
test_loss = 0.0
loss_fn = torch.nn.CrossEntropyLoss()
preds =[]
with torch.no_grad():
    for x,y in test_loader:
        logits = model(x)
        test_loss = loss_fn(logits, y).item() * x.size(0)
        pred = logits.argmax(1)
        preds.append(pred)
        correct+= (pred==y).sum().item()
        total += x.size(0)
print(f"Test acc: {100*correct/total:.2f}% | Test loss: {test_loss/total:.4f}")

Test acc: 95.10% | Test loss: 0.0001


In [63]:
import numpy as np

all_labels = []
all_preds = []

model.eval()
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to("cpu")
        labels = labels.to("cpu")

        outputs = model(imgs)
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

all_labels = np.array(all_labels)
all_preds = np.array(all_preds)

print(len(all_labels), len(all_preds))  # powinno być to samo


10500 10500


In [None]:
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# import matplotlib.pyplot as plt
#
#
# # preds  → np.array z predykcjami modelu
# # targets → np.array z prawdziwymi etykietami
# # class_names → lista nazw klas, np. ["日","本","人",...]
#
# cm = confusion_matrix(all_labels, all_preds, normalize="true")
#
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
#
# plt.figure(figsize=(10, 10))
# disp.plot(cmap="Blues", xticks_rotation=45, colorbar=True)
#
# plt.title("Confusion Matrix")
# plt.tight_layout()
# plt.show()
