In [None]:
!pip install torchsummary

In [15]:
import numpy as np
import pandas as pd
import cv2 as cv
import torch
import os
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchsummary import summary
from torchvision import models
from PIL import Image
import torchvision.models as models

# Preprocess

In [24]:
import torchvision.transforms.v2 as transforms

In [25]:
def read_image(folders, target_size=(190, 250)):
    image_array = []
    label = []
    # Loop through each folder provided in the list of folders
    for folder in folders:
        # Iterate over the classes/subdirectories inside the folder
        for i in os.listdir(folder):
            class_folder = os.path.join(folder, i)  # Get the full path of the class folder
            # Get all image files with specified extensions
            image_files = [f for f in os.listdir(class_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
            # Iterate over each image file in the class folder
            for j in image_files:
                image_path = os.path.join(class_folder, j)  # Get the full path of the image file
                image = cv.imread(image_path)  # Read the image
                image = cv.cvtColor(image, cv.COLOR_BGR2RGB)  # Convert the image from BGR to RGB color format
#                 image = cv.resize(image, target_size)  # Resize the image (commented out)
                image_array.append(image)  # Append the image to the image array
                label.append(i)  # Append the class label
    # Return the image array and labels as numpy arrays
    return np.array(image_array), np.array(label)


In [26]:
train, label = read_image(['/kaggle/input/dataset-for-yolo-190x250/Dataset_(190x250)/train', '/kaggle/input/dataset-for-yolo-190x250/Dataset_(190x250)/val'])

In [27]:
x_train, x_val, y_train, y_val = train_test_split(train, label, test_size=0.2, random_state=42)

In [28]:
class NumpyArrayDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images  # Store the images (numpy arrays)
        self.labels = labels  # Store the corresponding labels
        self.transform = transform  # Optional transform to be applied on a sample

    def __len__(self):
        # Return the total number of images in the dataset
        return len(self.images)

    def __getitem__(self, idx):
        # Retrieve an image and its corresponding label by index
        image = self.images[idx]
        label = self.labels[idx]

        # Convert the numpy array image to a PIL image
        image = Image.fromarray(image)

        # Apply the transform (if provided) to the image
        if self.transform:
            image = self.transform(image)

        # Return the transformed image and its label
        return image, label


In [30]:
train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip the image horizontally
    transforms.RandomRotation(degrees=10),   # Randomly rotate the image by up to 10 degrees
    transforms.RandomResizedCrop(size=(190, 250), scale=(0.8, 1.0)),  # Randomly crop and resize the image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Randomly change the brightness, contrast, saturation, and hue
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.ConvertImageDtype(torch.float32),  # Convert the image to float32
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Normalize the image
])

In [12]:
# train_transform = transforms.Compose([
#     transforms.Grayscale(num_output_channels=3),
#     transforms.ToTensor(),
#     transforms.ConvertImageDtype(torch.float32),
#     transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
# ])

In [31]:
val_transform = transforms.Compose([
    # Convert image to grayscale and output 3 channels (R, G, B)
    transforms.Grayscale(num_output_channels=3),
    
    # Convert the image to a PyTorch tensor
    transforms.ToTensor(),
    
    # Convert the image tensor to dtype float32
    transforms.ConvertImageDtype(torch.float32),
    
    # Normalize the image tensor by applying a mean and standard deviation
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [32]:
train_dataset = NumpyArrayDataset(x_train, y_train, transform=train_transform)
val_dataset = NumpyArrayDataset(x_val, y_val, transform=val_transform)

In [33]:
# Create a DataLoader for the training dataset
# Batch size of 4, shuffle the data to ensure random sampling for each epoch
trainloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# Create a DataLoader for the validation dataset
# Batch size of 4, shuffle the data to ensure random sampling for validation
valloader = DataLoader(val_dataset, batch_size=4, shuffle=True)


## YOLO Preprocess

In [None]:
!pip install ultralytics

In [None]:
!pip install -U ipywidgets

In [None]:
from ultralytics import YOLO

In [None]:
# Load a pretrained YOLOv8 model
# model = YOLO('https://github.com/akanametov/yolov8-face/releases/download/v0.0.0/yolov8l-face.pt')
# model = YOLO('https://github.com/akanametov/yolov8-face/releases/download/v0.0.0/yolov8n-face.pt')
# model = YOLO('yolov8l-cls.pt')
model = YOLO('yolov8x-cls.pt')

In [None]:
model.model

In [None]:
model.train(
    data='/kaggle/input/dataset-for-yolo-190x250/Dataset_(190x250)',
    epochs=15,
    imgsz=(190, 250),
    device='0',
    verbose=True,
    lr0=0.001,
    batch=4,
    optimizer='Adam'
)

In [18]:
model.save("/kaggle/working/trained_yolov8x-cls_2.pt")

# Trainning

In [None]:
!pip install facenet_pytorch

In [6]:
# from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights
from facenet_pytorch import InceptionResnetV1
# from torchvision import models
# from torchvision.models import googlenet, GoogLeNet_Weights

In [None]:
# model = models.vgg19_bn(pretrained='casia-webface')
# model = InceptionResnetV1(pretrained='casia-webface')
# model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
# model = googlenet(weights=GoogLeNet_Weights.IMAGENET1K_V1)
# model = models.densenet161(pretrained=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
summary(model, (3, 190, 250))

In [51]:
# num_features = model.classifier[3].in_features
# model.classifier[3] = nn.Linear(1280, 5)
model.fc = nn.Linear(4096, 5)

In [None]:
model

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(['Oblong', 'Round', 'Oval', 'Heart', 'Square'])

In [54]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

In [55]:
num_epochs = 15

In [56]:
train_losses = []
val_losses = []

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()  # Set model to training mode

    # Forward Pass (Training)
    for data in trainloader:
        images, labels = data[0].to(device), data[1]  # Move images to the appropriate device
        labels = label_encoder.transform(labels)  # Encode labels
        labels = torch.tensor(labels).to(device)  # Convert labels to tensor and move to device

        optimizer.zero_grad()  # Clear previous gradients

        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss

        loss.backward()  # Backpropagation (calculate gradients)
        optimizer.step()  # Update weights

        running_loss += loss.item()

    train_losses.append(running_loss / len(trainloader))
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {running_loss/len(trainloader):.4f}")

    # Validation (no backpropagation)
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient calculation for validation
        for data in valloader:
            images, labels = data[0].to(device), data[1]  # Move images to the appropriate device
            labels = label_encoder.transform(labels)  # Encode labels
            labels = torch.tensor(labels).to(device)  # Convert labels to tensor and move to device

            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_losses.append(val_loss / len(valloader))
    print(f"Validation Loss: {val_loss/len(valloader):.4f}, Accuracy: {100 * correct / total:.2f}%")

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.show()

In [None]:
model_path = 'model_MobileNetV3_Greyscal_Augment.pt'
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

## Test

In [36]:
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [37]:
test, label_test = read_image(['/kaggle/input/dataset-for-yolo-190x250/Dataset_(190x250)/test'])

In [39]:
transform_test = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [40]:
test_dataset = NumpyArrayDataset(test, label_test, transform=transform_test)

In [41]:
loader_test = DataLoader(test_dataset, batch_size=4, shuffle=True)

In [42]:
from sklearn.metrics import classification_report, confusion_matrix
import torch.nn.functional as F  # For softmax
import seaborn as sns

In [None]:
all_preds = []
all_labels = []
all_confidences = []  # To store confidence scores for each prediction

# Validation (no backpropagation)
model.eval()  # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation for validation
    for data in loader_test:
        images, labels = data[0].to(device), data[1]  # Move images to the appropriate device

        # Encode and move labels to the device
        encoded_labels = label_encoder.transform(labels)  # Encode labels
        encoded_labels = torch.tensor(encoded_labels).to(device)  # Convert labels to tensor

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, encoded_labels)
        val_loss += loss.item()

        # Apply softmax to get confidence scores for each class
        confidences = F.softmax(outputs, dim=1)

        # Predictions (choose the class with the highest score)
        _, predicted = torch.max(confidences, 1)

        # Append the predictions and true labels for classification report
        all_preds.extend(predicted.cpu().numpy())  # Move predictions to CPU and store in list
        all_labels.extend(encoded_labels.cpu().numpy())  # Move true labels to CPU and store in list
        all_confidences.extend(confidences.cpu().numpy())  # Move confidence scores to CPU and store

        # Calculate accuracy
        total += encoded_labels.size(0)
        correct += (predicted == encoded_labels).sum().item()

# Compute and print validation loss and accuracy
val_loss /= len(loader_test)
accuracy = 100 * correct / total
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")

# Classification Report
print("Classification Report:")
print(classification_report(all_labels, all_preds))

# Confusion Matrix
conf_matrix = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=range(5), yticklabels=range(5))
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()