<a href="https://colab.research.google.com/github/JLW493/LegoClassifierDockerRepo/blob/main/MEM679LegoLearner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image  # PIL is the Python Imaging Library, used for opening image files
import kagglehub
import random
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import shutil
import torch
import torch.nn as nn
from torchvision.models import efficientnet_b0
from torch.optim import Adam

# Downloads dataset
# path = 'C:/Users/email/Downloads/Lego Classification DataSet'
path = kagglehub.dataset_download("ronanpickell/b200c-lego-classification-dataset")

# Gets all folders that have files
available_folders = []
for root, dirs, files in os.walk(path):
    for dir_name in dirs:
        available_folders.append(os.path.relpath(os.path.join(root, dir_name), path))

# Proportion of validation set
VAL_SPLIT = 0.2

#Put in Foldermaking
# Path to original dataset (use your dataset's path here)
original_dataset_dir = path

# Dynamically set the output folder in the current working directory
output_dir = os.path.join(os.getcwd(), 'lego_dataset')  # Creates `lego_dataset` in the current script directory

# Define training and validation directories
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'validation')

# Create the train and validation directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Set a fixed random seed for reproducibility
random.seed(42)

# Selects 20 classes
selected_classes = available_folders[1:21]
print(f"Selected classes: {selected_classes}")


# Number of images to use per class
MAX_IMAGES_PER_CLASS = 100

# Traverse the original dataset directory to split images
for root, dirs, files in os.walk(original_dataset_dir):
    # If the current folder contains images (e.g., JPG/PNG files)
    if any(file.lower().endswith(('.jpg', '.jpeg', '.png')) for file in files):
        class_name = os.path.basename(root)  # Use folder name as class label

        # Create class-specific subdirectories in train and validation directories
        class_train_dir = os.path.join(train_dir, class_name)
        class_val_dir = os.path.join(val_dir, class_name)
        os.makedirs(class_train_dir, exist_ok=True)
        os.makedirs(class_val_dir, exist_ok=True)

        # Collect all image paths in the current directory
        image_paths = [os.path.join(root, file) for file in files if file.lower().endswith(('.jpg', '.jpeg', '.png'))]

        # Shuffle the image list for random selection
        random.shuffle(image_paths)

        # Limit the number of images to MAX_IMAGES_PER_CLASS
        image_paths = image_paths[:MAX_IMAGES_PER_CLASS]

        # Compute the split index
        split_idx = int(len(image_paths) * (1 - VAL_SPLIT))

        # Split the dataset into training and validation sets
        train_images = image_paths[:split_idx]
        val_images = image_paths[split_idx:]

        # Copy training images
        for image in train_images:
            shutil.copy(image, class_train_dir)

        # Copy validation images
        for image in val_images:
            shutil.copy(image, class_val_dir)

print("Dataset split completed!")
print(f"Training data stored in: {train_dir}")
print(f"Validation data stored in: {val_dir}")

# Define image transformations
IMG_SIZE = 64

train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),  # Resize to match model input
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Brightness & contrast adjustment
    transforms.ToTensor(),                   # Convert image to tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])


val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Load datasets
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transforms)

# Create DataLoaders
BATCH_SIZE = 10
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Class names
class_names = train_dataset.classes
print(f"Classes: {class_names}")
num_classes = len(class_names)

# Load a pretrained model
model = efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)  # Replace final layer

# Freeze the base model layers
for param in model.parameters():
    param.requires_grad = True

# Move the model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"Model loaded on {device}")

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Extend train_model to return metrics for plotting
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=20):
    """Trains Model

    Args:
        model: EfficientNet_B0
        train_loader: DataLoader from training set
        val_loader: DataLoader from validation set
        criterion: Cross Entropy Loss
        optimizer: Optimizer established above
        num_epochs: Number of Epochs


    Returns:
        train_losses: Losses on training set per epoch
        val_losses: Losses on validation set per epoch
        train_accuracies: Accuracy on training set per epoch
        val_accuracies: Accuracy on validation set per epoch
    """
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(num_epochs):
        model.train()
        train_loss, train_correct = 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * images.size(0)
            train_correct += (outputs.argmax(dim=1) == labels).sum().item()

        train_loss /= len(train_loader.dataset)
        train_accuracy = train_correct / len(train_loader.dataset)

        model.eval()
        val_loss, val_correct = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                val_correct += (outputs.argmax(dim=1) == labels).sum().item()

        val_loss /= len(val_loader.dataset)
        val_accuracy = val_correct / len(val_loader.dataset)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_accuracy)
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

    return train_losses, val_losses, train_accuracies, val_accuracies

# Train the model and collect metrics
train_losses, val_losses, train_accuracies, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs=20
)

# Plot performance
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label="Train Accuracy")
plt.plot(val_accuracies, label="Validation Accuracy")
plt.title("Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.tight_layout()
plt.show()

# Save the trained model
model_path = "lego_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

# Select one image from each class and run predictions
def predict_images_from_classes(model, class_names, val_dir):
    """Demonstrates Performance by Predicting Images From Each Class

    Args:
        model: EfficientNet-B0
        class_names (): Names of each class
        val_dir: Validation Set Directory
    """
    if not isinstance(model, torch.nn.Module):
        raise ValueError("The `model` parameter must be a PyTorch model.")

    model.eval()
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])

    for class_name in class_names:
        class_folder = os.path.join(val_dir, class_name)
        image_files = [
            os.path.join(class_folder, img) for img in os.listdir(class_folder)
            if img.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

        if not image_files:
            print(f"No images found in class folder: {class_folder}")
            continue

        image_path = random.choice(image_files)

        # Load image
        img = Image.open(image_path).convert("RGB")
        input_tensor = transform(img).unsqueeze(0).to(device)

        # Perform prediction
        with torch.no_grad():
            output = model(input_tensor)
            predicted_class_idx = output.argmax(dim=1).item()
            predicted_class = class_names[predicted_class_idx]

        # Display the image and prediction
        plt.imshow(img)
        plt.title(f"True: {class_name}, Predicted: {predicted_class}")
        plt.axis("off")
        plt.show()

# Run predictions on one image per class
predict_images_from_classes(model, class_names, val_dir)

Selected classes: ['64/60601', '64/4162', '64/14769', '64/3958', '64/60592', '64/4070', '64/3008', '64/43093', '64/99207', '64/2540', '64/2436', '64/41677', '64/3002', '64/32123', '64/32523', '64/15573', '64/99206', '64/18674', '64/24866', '64/87079']
Dataset split completed!
Training data stored in: /content/lego_dataset/train
Validation data stored in: /content/lego_dataset/validation
Classes: ['10247', '11090', '11211', '11212', '11214', '11458', '11476', '11477', '14704', '14719', '14769', '15068', '15070', '15100', '15379', '15392', '15535', '15573', '15712', '18651', '18654', '18674', '18677', '20482', '22388', '22885', '2357', '2412b', '2420', '24201', '24246', '2429', '2430', '2431', '2432', '2436', '2445', '2450', '2454', '2456', '24866', '25269', '2540', '26047', '2654', '26601', '26603', '26604', '2780', '27925', '28192', '2877', '3001', '3002', '3003', '3004', '3005', '3008', '3009', '3010', '30136', '3020', '3021', '3022', '3023', '3024', '3031', '3032', '3034', '3035', '3



Model loaded on cuda
Epoch 1/20
Train Loss: 4.0663, Train Accuracy: 0.1001
Val Loss: 2.9172, Val Accuracy: 0.2385
Epoch 2/20
Train Loss: 2.8934, Train Accuracy: 0.2491
Val Loss: 2.2232, Val Accuracy: 0.3812
Epoch 3/20
Train Loss: 2.3237, Train Accuracy: 0.3570
Val Loss: 1.9489, Val Accuracy: 0.4472
Epoch 4/20
Train Loss: 1.9928, Train Accuracy: 0.4298
Val Loss: 1.6504, Val Accuracy: 0.5205
Epoch 5/20
Train Loss: 1.7385, Train Accuracy: 0.4855
Val Loss: 1.6353, Val Accuracy: 0.5298
Epoch 6/20
Train Loss: 1.5736, Train Accuracy: 0.5291
Val Loss: 1.5023, Val Accuracy: 0.5587
Epoch 7/20
Train Loss: 1.4082, Train Accuracy: 0.5687
Val Loss: 1.5617, Val Accuracy: 0.5527
Epoch 8/20
Train Loss: 1.2631, Train Accuracy: 0.6078
Val Loss: 1.5064, Val Accuracy: 0.5817
Epoch 9/20
Train Loss: 1.1678, Train Accuracy: 0.6394
Val Loss: 1.4075, Val Accuracy: 0.5968
Epoch 10/20
Train Loss: 1.0682, Train Accuracy: 0.6605
Val Loss: 1.4649, Val Accuracy: 0.5955
Epoch 11/20
Train Loss: 0.9838, Train Accuracy: 

In [None]:
!pip install torch torchvision matplotlib kagglehub

