# Model Training

In [9]:
# Libraries

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
import os
import random

In [10]:
# Hyperparameters
num_epochs = 10
batch_size = 32
learning_rate = 0.001


transform = transforms.Compose([
    transforms.Resize((64, 64)), 
    transforms.ToTensor(),      
    transforms.Normalize((0.5,), (0.5,))
])


In [11]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1) 
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)  # image size
        self.fc2 = nn.Linear(128, 3)  # 3 output classes 
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 64 * 16 * 16) 
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)

        

def train_model(model, train_loader):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to('cpu'), labels.to('cpu')

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")


def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to('cpu'), labels.to('cpu')
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy: {100 * correct / total:.2f}%")


def visualize_predictions(model, test_loader):
    model.eval()
    images, labels = next(iter(test_loader))
    images, labels = images.to('cpu'), labels.to('cpu')
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

    plt.figure(figsize=(12, 8))
    for i in range(6):
        plt.subplot(2, 3, i + 1)
        plt.imshow(images[i].permute(1, 2, 0).cpu().numpy() * 0.5 + 0.5) 
        plt.title(f"True: {labels[i].item()}, Pred: {predicted[i].item()}")
        plt.axis('off')
    plt.show()

    
    


In [12]:
# Datasets: All Original Images

train_dataset = datasets.ImageFolder(root='train', transform=transform)  
test_dataset = datasets.ImageFolder(root='test', transform=transform)   

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [13]:
# Model for All Original Images

model = CNN()     
model.apply(init_weights)
    
train_model(model, train_loader)
evaluate_model(model, test_loader)
#visualize_predictions(model, test_loader)

Epoch [1/10], Loss: 1.1797
Epoch [2/10], Loss: 0.5302
Epoch [3/10], Loss: 0.3015
Epoch [4/10], Loss: 0.2006
Epoch [5/10], Loss: 0.1793
Epoch [6/10], Loss: 0.1256
Epoch [7/10], Loss: 0.1150
Epoch [8/10], Loss: 0.0542
Epoch [9/10], Loss: 0.0822
Epoch [10/10], Loss: 0.0421
Accuracy: 94.55%


In [14]:
# Datasets: All Synthetic Images

train_dataset2 = datasets.ImageFolder(root='syn_img', transform=transform)   
train_loader2 = DataLoader(dataset=train_dataset2, batch_size=batch_size, shuffle=True)


In [15]:
# Model for All Synthetic Images

model2 = CNN()     
model2.apply(init_weights)
    
train_model(model2, train_loader2)
evaluate_model(model2, test_loader)
#visualize_predictions(model2, test_loader)

Epoch [1/10], Loss: 1.0142
Epoch [2/10], Loss: 0.4824
Epoch [3/10], Loss: 0.2014
Epoch [4/10], Loss: 0.1336
Epoch [5/10], Loss: 0.0784
Epoch [6/10], Loss: 0.0488
Epoch [7/10], Loss: 0.0471
Epoch [8/10], Loss: 0.0309
Epoch [9/10], Loss: 0.0562
Epoch [10/10], Loss: 0.0476
Accuracy: 90.91%


In [16]:
# Datasets: Synthetic Images 50% Original 50%

syn_img_path = 'syn_img'
original_img_path = 'train'

synthetic_dataset = datasets.ImageFolder(root=syn_img_path, transform=transform)
original_dataset = datasets.ImageFolder(root=original_img_path, transform=transform)

def get_class_indices(dataset):
    """Return a dictionary with class indices grouped by category."""
    class_indices = {class_name: [] for class_name in dataset.classes}
    for idx, (_, label) in enumerate(dataset.samples):
        class_name = dataset.classes[label]
        class_indices[class_name].append(idx)
    return class_indices

synthetic_indices = get_class_indices(synthetic_dataset)
original_indices = get_class_indices(original_dataset)

def sample_indices(synthetic_indices, original_indices, syn_ratio=0.5, orig_ratio=0.5):
    combined_indices = []
    for category in synthetic_indices.keys():
        syn_count = int(len(synthetic_indices[category]) * syn_ratio)
        orig_count = int(len(original_indices[category]) * orig_ratio)

        syn_sample = random.sample(synthetic_indices[category], min(syn_count, len(synthetic_indices[category])))
        orig_sample = random.sample(original_indices[category], min(orig_count, len(original_indices[category])))

        combined_indices.extend(syn_sample + orig_sample)

    return combined_indices

combined_indices = sample_indices(synthetic_indices, original_indices)


combined_dataset = Subset(synthetic_dataset + original_dataset, combined_indices)
train_loader3 = DataLoader(combined_dataset, batch_size=32, shuffle=True)



In [17]:
# Model for Synthetic Images 50% Original 50%

model3 = CNN()     
model3.apply(init_weights)
    
train_model(model3, train_loader3)
evaluate_model(model3, test_loader)
#visualize_predictions(model3, test_loader)

Epoch [1/10], Loss: 1.4253
Epoch [2/10], Loss: 1.0214
Epoch [3/10], Loss: 0.6914
Epoch [4/10], Loss: 0.4524
Epoch [5/10], Loss: 0.2271
Epoch [6/10], Loss: 0.1558
Epoch [7/10], Loss: 0.1032
Epoch [8/10], Loss: 0.0776
Epoch [9/10], Loss: 0.0441
Epoch [10/10], Loss: 0.0420
Accuracy: 70.91%


In [18]:
# 70 % Synthetic 30% Original

combined_indices = sample_indices(synthetic_indices, original_indices, syn_ratio=0.7, orig_ratio=0.3)

combined_dataset = Subset(synthetic_dataset + original_dataset, combined_indices)
train_loader4 = DataLoader(combined_dataset, batch_size=32, shuffle=True)

In [19]:
# Model for 70 % Synthetic 30% Original

model4 = CNN()     
model4.apply(init_weights)
    
train_model(model4, train_loader4)
evaluate_model(model4, test_loader)
#visualize_predictions(model3, test_loader)

Epoch [1/10], Loss: 1.1633
Epoch [2/10], Loss: 0.5146
Epoch [3/10], Loss: 0.2474
Epoch [4/10], Loss: 0.1089
Epoch [5/10], Loss: 0.0844
Epoch [6/10], Loss: 0.0564
Epoch [7/10], Loss: 0.0365
Epoch [8/10], Loss: 0.0308
Epoch [9/10], Loss: 0.0347
Epoch [10/10], Loss: 0.0206
Accuracy: 69.09%
