In [37]:
import numpy as np
import os
from torchvision import transforms
from PIL import Image
from datasets import Dataset
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from sklearn.metrics import classification_report, accuracy_score
import torch.nn.functional as F

## Data preprocessing

In [38]:
class PokemonDataset:
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.pokemons_names = os.listdir(data_dir)[0:10]
        self.data = []
        self.labels = []
        self.prepare_dataset()

    def load_image(self, filepath: str):
        img = Image.open(filepath).convert("RGB")
        return img

    def get_images_for_one_pokemon(self, pokemon_name: str):
        paths = os.listdir(os.path.join(self.data_dir, pokemon_name))
        images = [self.load_image(os.path.join(self.data_dir, pokemon_name, path)) for path in paths]
        augmented_images = []
        for image in images:
            augmented_images.extend(self.augment_image(image=image))

        images = list(map(transforms.Resize((128, 128)), images))
        images = list(map(transforms.Grayscale(), images))
        return images + augmented_images

    def augment_image(self, image, n_augmented_images: int = 10):
        transform = transforms.Compose([
            transforms.Resize((128, 128)),
            transforms.RandomHorizontalFlip(p=np.random.random()),
            transforms.RandomRotation(degrees=np.random.uniform(low=1, high=45)),
            transforms.GaussianBlur(kernel_size=5),
            transforms.Grayscale()
        ])
        return [transform(image) for _ in range(n_augmented_images)]

    def prepare_dataset(self):
        
        for idx, name in enumerate(self.pokemons_names):
            images = self.get_images_for_one_pokemon(name)
            images = [ToTensor()(image) for image in images]  # Convert images to tensors
            self.data.extend(images)
            self.labels.extend([idx] * len(images))

    def to_huggingface_dataset(self):
        dataset = Dataset.from_dict({"image": self.data, "label": self.labels})
        return dataset

    def train_test_split(self, test_size=0.2, random_state=42):
        train_data, test_data, train_labels, test_labels = train_test_split(
            self.data, self.labels, test_size=test_size, random_state=random_state
        )
        train_dataset = Dataset.from_dict({"image": train_data, "label": train_labels})
        test_dataset = Dataset.from_dict({"image": test_data, "label": test_labels})
        return train_dataset, test_dataset

In [39]:
# Load dataset
dataset = PokemonDataset(data_dir="../data/pokemons/")

In [None]:
print(f"Unique Pokemons: {len(dataset.pokemons_names)}")
print(f"Images per Pokemon on average: {len(dataset.labels) // len(dataset.pokemons_names)}")

In [41]:
# Split dataset into train and test
train_dataset, test_dataset = dataset.train_test_split(test_size=0.2)

In [42]:

# Изменение последовательности для загрузчика
def collate_batch(batch):
    imgs = [item["image"] for item in batch]
    imgs = torch.tensor(imgs)
    targets = list(map(lambda item: item["label"], batch))
    targets = torch.tensor(targets)
    return imgs, targets

# Prepare data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_batch)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_batch)

In [None]:
len(set(dataset.labels))

In [34]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 29 * 29, 120)
        self.fc2 = nn.Linear(120, len(dataset.pokemons_names))

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(-1, 16 * 29 * 29)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [None]:
# Initialize model, loss function, and optimizer
model = LeNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Evaluation
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

# Calculate metrics
accuracy = accuracy_score(all_labels, all_preds)
report = classification_report(all_labels, all_preds, target_names=dataset.pokemons_names)
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:')
print(report)