In [None]:
from MNISTDataset import MNISTDataset
from CNN import CNNModel
import torch.nn as nn
import torch
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from torchvision import transforms 

In [None]:
train_dataset = MNISTDataset(r"C:\Users\shahd\OneDrive\Desktop\neuralNetwork2\Neural-network\archive\train-images.idx3-ubyte", r"C:\Users\shahd\OneDrive\Desktop\neuralNetwork2\Neural-network\archive\train-labels.idx1-ubyte")
test_dataset = MNISTDataset(r"C:\Users\shahd\OneDrive\Desktop\neuralNetwork2\Neural-network\archive\t10k-images.idx3-ubyte", r"C:\Users\shahd\OneDrive\Desktop\neuralNetwork2\Neural-network\archive\t10k-labels.idx1-ubyte")
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [50000, 10000])

default_neurons = [128, 64]
learning_rates = [0.0001, 0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128, 256]
neurons = [[64, 32], [128, 64], [256, 128, 64], [512, 256, 128]]

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1000, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

epochs = 10
mymodel = CNNModel()
criterion =nn.CrossEntropyLoss()
optimize=torch.optim.SGD(mymodel.parameters(),lr=0.01)

best_lr_config = {"learning_rate": None, "val_accuracy": 0}
best_bs_config = {"batch_size": None, "val_accuracy": 0}

# Combined best configuration
combined_best_config = {"learning_rate": None, "batch_size": None,"val_accuracy": 0}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mymodel.to(device)





In [None]:
for lr in learning_rates:
    mymodel = CNNModel()
    criterion =nn.CrossEntropyLoss()
    print(f"Training with learning rate: {lr}")
    optimizer = torch.optim.SGD(mymodel.parameters(), lr=lr)
    train_losses, val_losses, train_accuracies, val_accuracies = train_mymodel(mymodel, train_loader, val_loader, criterion, optimizer, epochs)
    plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies)
    
    max_val_accuracy = max(val_accuracies)
    
    # Update the best learning rate configuration
    if max_val_accuracy > best_lr_config["val_accuracy"]:
        best_lr_config.update({"learning_rate": lr, "val_accuracy": max_val_accuracy})
    if max_val_accuracy > combined_best_config["val_accuracy"]:
        combined_best_config.update({"learning_rate": lr, "batch_size": 64, "neurons": default_neurons, "val_accuracy": max(val_accuracies)})

In [None]:
for bs in batch_sizes:
    mymodel = CNNModel()
    criterion =nn.CrossEntropyLoss()
    optimize=torch.optim.SGD(mymodel.parameters(),lr=0.01)
    print(f"Training with batch size: {bs}")
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
    train_losses, val_losses, train_accuracies, val_accuracies = train_mymodel(mymodel, train_loader, val_loader, criterion, optimize, epochs)
    plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies)
    
    max_val_accuracy = max(val_accuracies)
    
    # Update the best batch size configuration
    if max_val_accuracy > best_bs_config["val_accuracy"]:
        best_bs_config.update({"batch_size": bs, "val_accuracy": max_val_accuracy})
    if max_val_accuracy > combined_best_config["val_accuracy"]:
        combined_best_config.update({"learning_rate": 0.01, "batch_size": bs, "neurons": default_neurons, "val_accuracy": max(val_accuracies)})

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    model.to(device)
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(epochs):
        # Training
        model.train()
        train_loss, correct, total = 0, 0, 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        train_losses.append(train_loss / total)
        train_accuracies.append(correct / total)

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)
        val_losses.append(val_loss / val_total)
        val_accuracies.append(val_correct / val_total)

        print(f"Epoch {epoch+1}/{epochs}")
        print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"  Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
    return train_losses, val_losses, train_accuracies, val_accuracies

In [None]:
def evaluate_model(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    cm = confusion_matrix(all_labels, all_preds)

    print(f"Test Accuracy: {accuracy:.4f}")
    
    # Plot confusion matrix
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

In [None]:
def plot_loss_accuracy(train_losses, val_losses, train_accuracies, val_accuracies):
    # Plotting the training and validation loss
    plt.figure(figsize=(12, 6))

    # Plot Loss
    plt.subplot(1, 2, 1)  # 1 row, 2 columns, first subplot
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    # Plot Accuracy
    plt.subplot(1, 2, 2)  # 1 row, 2 columns, second subplot
    plt.plot(range(1, len(train_accuracies) + 1), train_accuracies, label='Train Accuracy')
    plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    # Show the plots
    plt.tight_layout()
    plt.show()

In [None]:

# Re-train and evaluate the final combined best model
print("\nTraining and evaluating the best combined model on test set...")
train_loader = DataLoader(train_dataset, batch_size=combined_best_config["batch_size"], shuffle=True)
mymodel = CNNModel()  # Initialize the best model
optimizer = torch.optim.SGD(mymodel.parameters(), lr=combined_best_config["learning_rate"])
train_mymodel(mymodel, train_loader, val_loader, criterion, optimizer, epochs)
evaluate_model(mymodel, test_loader)

# Print the best configurations
print("\nBest Configurations:")
print(f"  Best Learning Rate: {best_lr_config['learning_rate']} (Validation Accuracy: {best_lr_config['val_accuracy']:.4f})")
print(f"  Best Batch Size: {best_bs_config['batch_size']} (Validation Accuracy: {best_bs_config['val_accuracy']:.4f})")
print(f"  Combined Best Configuration: {combined_best_config}")