# Genshin Image Classifier
a convolutional neural network (CNN) model to classify images of characters from the game Genshin Impact. The model is trained on a dataset of 20 classes, each representing characters from the game. The dataset is obtained from web scraping images from pixiv.net using it tag search feature.

## Import dependencies

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

## Check available GPU devices and empty cache

In [2]:
torch.cuda.empty_cache()

In [3]:
!nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU (UUID: GPU-fcd7eac7-b588-89f2-ce92-bd0a6029e522)


## Select GPU as device if available
select the device for training, if GPU is available, select GPU, otherwise select CPU

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Select GPU if available, else CPU

In [5]:
print(device)

cuda:0


## Mount the Drive unit (Only for Google Colab)

In [6]:
from google.colab import drive # Mount Google Drive to access dataset
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


## Define custom torch dataset class
the dataset class is used to load the data from the image folders, into a pytorch dataset object. This class is used to load the data into the model for training and validation.

In [6]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import Compose
import os
from PIL import Image

class GenshinDataSet(Dataset):
    def __init__(self, directory: str, transforms: Compose = None) -> None:
        self.directory = directory  # path to the dataset directory
        self.characters = os.listdir(directory)  # List of characters as folder names
        self.transforms = transforms  # Image transformations
        self.images = []  # List of image paths
        self.labels = []  # List of labels (numerical)

        for character in self.characters:  # Loop through the list of characters to get the images and labels
            category_path = os.path.join(directory, character)  # Path to the character folder
            label = len(self.characters) - 1 - self.characters.index(character)  # Assign numerical label based on character index

            for image_file in os.listdir(category_path):  # Loop through the images in the character folder
                image_path = os.path.join(category_path, image_file)
                self.images.append(image_path)  # Append image path           
                self.labels.append(label)  # Append numerical label

    def __getitem__(self, index) -> tuple[any, torch.Tensor]:  # Get the image and label at the specified index
        with Image.open(self.images[index]) as img:
            image = img.copy()
        label = self.labels[index]  # Get the label

        if image.mode == 'L':  # Check for grayscale mode ('L')
                    image = image.convert('RGB')  # Convert to RGB mode
        
        if self.transforms is not None:
            image = self.transforms(image)

        return image, torch.tensor(label)  # Return the image and label as a tuple (image, torch.Tensor)
    
    def __len__(self) -> int:
        return len(self.images)  # Return the number of images in the dataset

    def getLabelCount(self) -> int:
        return len(self.characters)  # Return the number of characters in the dataset

## Create dataset with transforms
Create the pytorch dataset class with transforms for the training and validation datasets.

In [7]:
mean = torch.tensor([0.0189, 0.0177, 0.0192]) # Mean values for normalization, get from normalizationParameters.py
std = torch.tensor([0.0098, 0.0097, 0.0094]) # Standard deviation values for normalization, get from normalizationParameters.py
batch_size = 64 # Batch size (number of images to process at once)

In [8]:
# Local training
all_transforms = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize(mean=mean, std=std)
                                     ])

train_dataset = GenshinDataSet(directory = r'C:\\Users\\Katana GF66 11UC\\Documents\\GenshinImageClassifier\\processed_images', transforms = all_transforms) # Load the training dataset

num_classes = train_dataset.getLabelCount() # Get the number of classes in the dataset

test_dataset = GenshinDataSet(directory = r'C:\\Users\\Katana GF66 11UC\\Documents\\GenshinImageClassifier\\processed_images_test', transforms = all_transforms) # Load the testing dataset

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True) # Instantiate loader objects to facilitate processing

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True) # Instantiate loader objects to facilitate processing

In [25]:
# Google Colab training
all_transforms = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize(mean=mean, std=std)
                                     ])

train_dataset = GenshinDataSet(directory = '/content/gdrive/MyDrive/GenshinImageClassifier/128classifier/processed_images/', transforms = all_transforms) # Load the training dataset

num_classes = train_dataset.getLabelCount() # Get the number of classes in the dataset

test_dataset = GenshinDataSet(directory = '/content/gdrive/MyDrive/GenshinImageClassifier/128classifier/processed_images_test/', transforms = all_transforms) # Load the testing dataset

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True) # Instantiate loader objects to facilitate processing

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True) # Instantiate loader objects to facilitate processing

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Users\Katana GF66 11UC\.conda\envs\genshinimageclassifierCUDA\Lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Katana GF66 11UC\AppData\Local\Temp\ipykernel_23856\169898249.py", line 6, in <module>
    train_dataset = GenshinDataSet(directory = '/content/gdrive/MyDrive/GenshinImageClassifier/128classifier/processed_images/', transforms = all_transforms) # Load the training dataset
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Katana GF66 11UC\AppData\Local\Temp\ipykernel_23856\2820496547.py", line 10, in __init__
    self.characters = os.listdir(directory)  # List of characters as folder names
                      ^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [WinError 3] The system cannot find the path specified: '/conte

## Create the ConvNet Class
define the ConvNet class that represents the convolutional neural network model.

In [14]:
import torch
import torch.nn as nn

#input is a 128x128 image with 3 channels (RGB)
class ConvNet(nn.Module):
  def __init__(self,category_count):
    super(ConvNet,self).__init__()
    self.convolution_layer1 = nn.Conv2d(in_channels=3,out_channels=32,kernel_size=5) # Convolution layer 1: 3 input channels (RGB), 64 output channels (64 filters), 5x5 kernel size, output size = (128-5)/1 + 1 = 124
    self.convolution_layer2 = nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5) # Convolution layer 2: 64 input channels (64 kernels from previous layer), 64 output channels (64 filters), 5x5 kernel size, output size = (124-5)/1 + 1 = 120
    self.max_pool = nn.MaxPool2d(kernel_size=2,stride=2) # Max pooling layer: 2x2 kernel size, stride 2 (reduces image size by 2, 120x120 to 60x60)
    self.dropout = nn.Dropout2d(p=0.5) # Dropout layer: 50% dropout rate

    self.convolution_layer3 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3) # Convolution layer 3: 64 input channels (64 kernels from previous layer), 128 output channels (128 filters), 3x3 kernel size, output size = (60-3)/1 + 1 = 58
    self.convolution_layer4 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3) # Convolution layer 4: 128 input channels (128 kernels from previous layer), 128 output channels (128 filters), 3x3 kernel size, output size = (58-3)/1 + 1 = 56
    self.max_pool2 = nn.MaxPool2d(kernel_size=2,stride=2) # Max pooling layer 2: 2x2 kernel size, stride 2 (reduces image size by 2, 64x64 to 32x32), output size = 56/2 = 28
    
    self.convolution_layer5 = nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3) # Convolution layer 5: 128 input channels (128 kernels from previous layer), 256 output channels (256 filters), 3x3 kernel size, output size = (28-3)/1 + 1 = 26
    self.convolution_layer6 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3) # Convolution layer 6: 256 input channels (256 kernels from previous layer), 256 output channels (256 filters), 3x3 kernel size, output size = (26-3)/1 + 1 = 24
    self.max_pool3 = nn.MaxPool2d(kernel_size=2,stride=2) # Max pooling layer 3: 2x2 kernel size, stride 2 (reduces image size by 2, 32x32 to 16x16), output size = 24/2 = 12

    self.fully_connected1 = nn.Linear(128*12*12,4096) # Fully connected layer 1: 128*12*12 input features (128 filters from last convolution layer, 12x12 image size), 128 output features
    self.relu = nn.ReLU() # ReLU activation function
    self.fully_connected2 = nn.Linear(4096,1024) # Fully connected layer 2: 4096 inputs 1024 outputs
    self.relu2 = nn.ReLU() # ReLU activation function
    self.fully_connected3 = nn.Linear(1024,128) # Fully connected layer 3: 1024 inputs 512 outputs
    self.relu3 = nn.ReLU() # ReLU activation function
    self.fully_connected4 = nn.Linear(128,category_count) # Output layer: linear layer
    self.softmax = nn.Softmax(dim=1) # Softmax activation function

  def forward(self,x):
    output = self.convolution_layer1(x) # Convolution layer 1
    #tanh activation function
    output = self.convolution_layer2(output) # Convolution layer 2
    output = self.max_pool(output) # Max pooling layer
    output = self.dropout(output) # Dropout layer

    output = self.convolution_layer3(output) # Convolution layer 3
    output = self.convolution_layer4(output) # Convolution layer 4
    output = self.max_pool2(output) # Max pooling layer 2

    output = self.convolution_layer5(output) # Convolution layer 5
    output = self.convolution_layer6(output) # Convolution layer 6
    output = self.max_pool3(output) # Max pooling layer 3

    output = output.reshape(output.size(0),-1) # Flatten the output for the fully connected layer

    output = self.fully_connected1(output) # Fully connected layer 1
    output = self.relu(output)  # ReLU activation function
    output = self.fully_connected2(output) # Fully connected layer 2
    output = self.relu2(output) # ReLU activation function
    output = self.fully_connected3(output) # Fully connected layer 3
    output = self.relu3(output) # ReLU activation function
    output = self.fully_connected4(output) # Output layer
    output = self.softmax(output) # Softmax activation function
    return output

## Hyperparameters configuration
set epochs, learning_rate, num_classes, device, loss function, optimizer and total steps

In [15]:
size = 128 # Image resolution for the model input
num_epochs = 200 # Number of epochs for training

model = ConvNet(num_classes) # Create the model with the number of classes required

lossFunction = nn.CrossEntropyLoss() # Set loss function as CrossEntropyLoss

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay = 0.005, momentum = 0.9) # Set thr optimizer as a Stochastic Gradient Descent with the learning rate, weight decay and momentum

total_step = len(train_loader) # Set the total step as the length of the train loader

# Entrenamiento

In [17]:
if torch.cuda.is_available():
      model.cuda()

for epoch in range(num_epochs):
	#Load in the data in batches using the train_loader object
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = lossFunction(outputs, labels)
        print(f"loss: {loss}")

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

loss: 1.7937062978744507
loss: 1.7931976318359375
loss: 1.788885235786438
loss: 1.7879923582077026
loss: 1.7867385149002075
loss: 1.7869040966033936
loss: 1.7930384874343872
loss: 1.7932608127593994
loss: 1.7937023639678955
loss: 1.7832533121109009
loss: 1.7923166751861572
loss: 1.79351806640625
loss: 1.7863588333129883
loss: 1.788608431816101
loss: 1.7960543632507324
loss: 1.7923638820648193
loss: 1.7838382720947266
loss: 1.7904646396636963
loss: 1.7876746654510498
loss: 1.7822284698486328
loss: 1.7916364669799805
loss: 1.7836687564849854
loss: 1.7935378551483154
loss: 1.7924178838729858
loss: 1.785955548286438
loss: 1.7915524244308472
loss: 1.7981659173965454
loss: 1.7702300548553467
loss: 1.78925621509552
loss: 1.7894781827926636
loss: 1.8059054613113403
loss: 1.7978020906448364
loss: 1.7983461618423462
loss: 1.7886232137680054
loss: 1.7750850915908813
loss: 1.7830655574798584
loss: 1.7913049459457397
loss: 1.7863885164260864
loss: 1.778971791267395
loss: 1.7948716878890991
loss: 1.

KeyboardInterrupt: 

## Prueba

### Test on training data

In [18]:
with torch.no_grad():
    if torch.cuda.is_available():
      model.cuda()
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the {train_dataset.__len__()} train images: {100 * correct / total} %')



Accuracy of the network on the 9590 train images: 94.76538060479666 %


### Test on validation data

In [21]:
with torch.no_grad():
    if torch.cuda.is_available():
      model.cuda()
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the {test_dataset.__len__()} test images: {100 * correct / total} %')



Accuracy of the network on the 5589 test images: 76.43585614600107 %


## Save to file

In [20]:
# prompt: save the model as a file

torch.save(model.state_dict(), r'C:\\Users\\Katana GF66 11UC\\Documents\\GenshinImageClassifier\\models\\model_128_6Categories_160epoch.pt')


## Share to Hugging Face

In [None]:
# hugging face PretrainedModel custom class



# Open from file

In [None]:
# prompt: open and make inference from  model.pt

# Import the necessary libraries
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Load the model
model = ConvNet(num_classes)
model.load_state_dict(torch.load('model.pt'))

# Define the data transform
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the data
data_dir = 'path/to/data'
dataset = ImageFolder(data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Make inference
with torch.no_grad():
    for images, labels in dataloader:
        # Move the images to the device
        images = images.to(device)

        # Get the model's predictions
        outputs = model(images)

        # Get the predicted class labels
        _, predicted = torch.max(outputs, 1)

        # Print the predicted class labels
        print(predicted)
