In [None]:
import os
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
from torchinfo import summary
from torchvision import models
from torchvision.models import MobileNet_V3_Small_Weights
import pyfirmata as fir
import time
import torch.nn.functional as F
from collections import deque
from torchvision import datasets, transforms, models
import cv2

### Dataset Preparation

In [None]:
os.chdir('Downloads') # Change the directory to where you stored your dataset
os.getcwd()  # Check the current directory

In [None]:
# Define paths
dataset_path = 'C:\\Users\\ALDY\\Downloads\\Dataset Edging'
train_dir = os.path.join(dataset_path, 'train')
test_dir = os.path.join(dataset_path, 'test')

In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Load datasets
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
test_dataset = datasets.ImageFolder(test_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Class labels
class_labels = train_dataset.classes
print("Classes:", class_labels)

### Model Build

In [None]:
class MultiClassClassificationModel(nn.Module):
    def __init__(self, num_classes=4):
        super(MultiClassClassificationModel, self).__init__()
        self.mobilenet = models.mobilenet_v3_small(weights=MobileNet_V3_Small_Weights.DEFAULT)
        
        # Modify the classifier to output num_classes instead of 1
        self.mobilenet.classifier[3] = nn.Linear(self.mobilenet.classifier[3].in_features, num_classes)

    def forward(self, x):
        return self.mobilenet(x)

# Initialize model for multi-class classification with 5 classes
model = MultiClassClassificationModel(num_classes=4)  # Change to 5 for the model trained with 5 classes

# Move model to the appropriate device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
summary(model, input_size=(16, 3, 128, 128)) # Added batch size dimension

### Model Training

In [None]:
# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

### Confusion Matrix

In [None]:
# Evaluate the model and calculate predictions
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Save the entire model
torch.save(model, '/content/model.pth')

In [None]:
# Assuming you have defined your MultiClassClassificationModel elsewhere
model = MultiClassClassificationModel(num_classes=4)  # Adjust the number of classes if needed

# Map the weights to CPU to avoid CUDA errors
model.load_state_dict(torch.load('C:\\Users\\ALDY\\Downloads\\mobilenet.pth', map_location=torch.device('cpu')))
model.eval()

all_preds = []
all_labels = []

# Assuming you have a DataLoader named 'test_loader'
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to('cuda' if torch.cuda.is_available() else 'cpu')
        labels = labels.to('cuda' if torch.cuda.is_available() else 'cpu')
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate confusion matrix
class_labels = ['Abraar', 'Aldy', 'Dimas', 'Haikal']  # Replace with your class names
cm = confusion_matrix(all_labels, all_preds)

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

### Live Feeds Testing

In [None]:
# Load the trained model
model = MultiClassClassificationModel(num_classes=4)
model.load_state_dict(torch.load('best_model.pth', map_location=torch.device('cpu'))) # Change path depending on your saved mnodel name
model.eval()

# Define image preprocessing transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Map class indices to labels
class_names = ['Aldy', 'Abraar', 'Dimas', 'Haikal']  # Change depending on your class labels

# Function to predict a single frame
def predict_frame(frame):
    img_tensor = transform(frame).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        outputs = model(img_tensor)
        probabilities = F.softmax(outputs, dim=1)
        max_prob, predicted = torch.max(probabilities, 1)
    return class_names[predicted.item()], max_prob.item()

# Open the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break

    # Convert frame to RGB for prediction (OpenCV uses BGR by default)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    predicted_label, score = predict_frame(frame_rgb)

    # Display the prediction and score on the frame
    cv2.putText(frame, f'Prediction: {predicted_label} ({score:.2f})', (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame
    cv2.imshow('Real-Time Prediction', frame)

    # Exit when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()