# Import dependencies

In [51]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Check if GPU is available

In [None]:
!nvidia-smi -L

GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU (UUID: GPU-fcd7eac7-b588-89f2-ce92-bd0a6029e522)


# Select GPU as device if available

In [52]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Select GPU if available, else CPU

In [53]:
print(device)

cpu


# Mount the Drive unit

In [54]:
from google.colab import drive # Mount Google Drive to access dataset
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


# Define custom torch dataset class

In [61]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import Compose
import os
from PIL import Image

class GenshinDataSet(Dataset):
    def __init__(self, directory: str, transforms: Compose = None) -> None:
        self.directory = directory  # path to the dataset directory
        self.characters = os.listdir(directory)  # List of characters as folder names
        self.transforms = transforms  # Image transformations
        self.images = []  # List of image paths
        self.labels = []  # List of labels (numerical)

        for character in self.characters:  # Loop through the list of characters to get the images and labels
            category_path = os.path.join(directory, character)  # Path to the character folder
            label = len(self.characters) - 1 - self.characters.index(character)  # Assign numerical label based on character index

            for image_file in os.listdir(category_path):  # Loop through the images in the character folder
                image_path = os.path.join(category_path, image_file)
                self.images.append(Image.open(image_path))
                self.labels.append(label)  # Append numerical label

    def __getitem__(self, index) -> tuple[any, torch.Tensor]:  # Get the image and label at the specified index
        image = self.images[index]  # Get the image
        label = self.labels[index]  # Get the label

        if image.mode == 'L':  # Check for grayscale mode ('L')
                    image = image.convert('RGB')  # Convert to RGB mode

        if self.transforms is not None:
            image = self.transforms(image)

        return image, torch.tensor(label)  # Return the image and label as a tuple (image, torch.Tensor)

    def __len__(self) -> int:
        return len(self.images)  # Return the number of images in the dataset

    def getLabelCount(self) -> int:
        return len(self.characters)  # Return the number of characters in the dataset

# Create dataset with transforms

In [68]:
mean = torch.tensor([0.0195, 0.0181, 0.0187]) # Mean values for normalization, get from normalizationParameters.py
std = torch.tensor([0.0088, 0.0088, 0.0087]) # Standard deviation values for normalization, get from normalizationParameters.py
size = 32 # Image resolution for the model input

batch_size = 64
learning_rate = 0.001
num_epochs = 20

In [63]:
all_transforms = transforms.Compose([transforms.Resize((size,size)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=mean, std=std)
                                     ])

train_dataset = GenshinDataSet(directory = '/content/gdrive/MyDrive/GenshinImageClassifier/processed_images/', transforms = all_transforms) # Load the training dataset

num_classes = train_dataset.getLabelCount()

test_dataset = GenshinDataSet(directory = '/content/gdrive/MyDrive/GenshinImageClassifier/processed_images_test/', transforms = all_transforms) # Load the testing dataset

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True) # Instantiate loader objects to facilitate processing

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True) # Instantiate loader objects to facilitate processing

# ConvNet

In [58]:
class ConvNet(nn.Module):
  def __init__(self,category_count):
    super(ConvNet,self).__init__()
    self.convolution_layer1 = nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3) # Convolution layer 1: 3 input channels (RGB), 32 output channels (32 filters), 3x3 kernel size
    self.convolution_layer2 = nn.Conv2d(in_channels=32,out_channels=32,kernel_size=3) # Convolution layer 2: 32 input channels (32 kernels from previous layer), 32 output channels (32 filters), 3x3 kernel size
    self.max_pool = nn.MaxPool2d(kernel_size=2,stride=2) # Max pooling layer: 2x2 kernel size, stride 2 (reduces image size by 2)

    self.convolution_layer3 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3) # Convolution layer 3: 32 input channels (32 kernels from previous layer), 64 output channels (64 filters), 3x3 kernel size
    self.convolution_layer4 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3)
    self.max_pool2 = nn.MaxPool2d(kernel_size=2,stride=2) # Max pooling layer: 2x2 kernel size, stride 2 (reduces image size by 2)

    self.fully_connected1 = nn.Linear(1600,128) # Fully connected layer 1: 64*5*5 input features (64 filters, 5x5 image size), 128 output features
    self.relu = nn.ReLU() # ReLU activation function
    self.fully_connected2 = nn.Linear(128,category_count) # Output layer: linear layer

  def forward(self,x):
    output = self.convolution_layer1(x)
    output = self.convolution_layer2(output)
    output = self.max_pool(output)

    output = self.convolution_layer3(output)
    output = self.convolution_layer4(output)
    output = self.max_pool2(output)

    output = output.reshape(output.size(0),-1)

    output = self.fully_connected1(output)
    output = self.relu(output)
    output = self.fully_connected2(output)
    return output

# Configurando hiperparametro

In [59]:
model = ConvNet(num_classes) # Create the model with the number of classes required

lossFunction = nn.CrossEntropyLoss() # Set loss function as CrossEntropyLoss

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9) # Set thr optimizer as a Stochastic Gradient Descent with the learning rate, weight decay and momentum

total_step = len(train_loader) # Set the total step as the length of the train loader

# Entrenamiento

In [69]:
if torch.cuda.is_available():
      model.cuda()

for epoch in range(num_epochs):
	#Load in the data in batches using the train_loader object
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = lossFunction(outputs, labels)
        print(f"loss: {loss}")

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5B1CF70> to RGB mode
loss: 2.240577459335327
loss: 2.498189926147461
loss: 2.3150103092193604
loss: 2.2642295360565186
loss: 2.2696075439453125
loss: 2.192246437072754
loss: 2.262152910232544
loss: 2.4263665676116943
loss: 2.1956984996795654
loss: 2.3902409076690674
loss: 2.336926221847534
loss: 2.7829058170318604
loss: 2.322904348373413
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5E741F0> to RGB mode
loss: 2.4508137702941895
loss: 2.0709245204925537
loss: 2.90267014503479
loss: 2.3368968963623047
loss: 2.216075897216797
loss: 2.3687398433685303
loss: 2.4543094635009766
loss: 2.378943681716919
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE6957C70> to RGB mode
loss: 2.3427956104278564
loss: 2.542393922805786
loss: 2.4076032638549805
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5F1D420> to RGB mode
loss: 2

# Prueba

In [70]:
with torch.no_grad():
    if torch.cuda.is_available():
      model.cuda()
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the {train_dataset.__len__()} train images: {100 * correct / total} %')



Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5F1D420> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5E2B2E0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5E741F0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE6C666E0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE6957490> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE6957C70> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5B1CF70> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE62757B0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5EC4190> to RGB mode
Accuracy of the network on the 5974 train images: 95.39671911617008 %


In [71]:
with torch.no_grad():
    if torch.cuda.is_available():
      model.cuda()
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the {train_dataset.__len__()} train images: {100 * correct / total} %')



Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE58199F0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5972170> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE56CA3B0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5818A00> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5623B80> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE58194B0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5623040> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE8200C40> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5819BD0> to RGB mode
Converting <PIL.PngImagePlugin.PngImageFile image mode=L size=256x256 at 0x7B5AE5623CA0> to

# Save to file

In [None]:
# prompt: save the model as a file

torch.save(model.state_dict(), 'model.pt')


# Open from file

In [None]:
# prompt: open and make inference from  model.pt

# Import the necessary libraries
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Load the model
model = ConvNet(num_classes)
model.load_state_dict(torch.load('model.pt'))

# Define the data transform
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the data
data_dir = 'path/to/data'
dataset = ImageFolder(data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Make inference
with torch.no_grad():
    for images, labels in dataloader:
        # Move the images to the device
        images = images.to(device)

        # Get the model's predictions
        outputs = model(images)

        # Get the predicted class labels
        _, predicted = torch.max(outputs, 1)

        # Print the predicted class labels
        print(predicted)
