In [2]:
# NGUYEN CHI MANH
import torch
import torchvision.transforms as transforms
import torchvision
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(root='../../data', train=True, transform=transform)
val_dataset = torchvision.datasets.MNIST(root='../../data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1000, shuffle=False)

class CNN_dynamic(nn.Module):
    def __init__(self, kernel_size, conv_layer_num):
        super(CNN_dynamic, self).__init__()
        feature_size = 28
        conv_layers = []

        conv_layers.append(nn.Conv2d(in_channels=1, out_channels=32, kernel_size=kernel_size, stride=1, padding=kernel_size//2))
        conv_layers.append(nn.ReLU())
        for i in range(conv_layer_num - 1):
            conv_layers.append(nn.Conv2d(in_channels=32, out_channels=32, kernel_size=kernel_size, stride=1, padding=kernel_size//2))
            conv_layers.append(nn.ReLU())
            if i < conv_layer_num - 2:
                conv_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
                feature_size //= 2
            conv_layers.append(nn.Dropout(0.25))

        self.conv_layers = nn.Sequential(*conv_layers)
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=32 * feature_size * feature_size, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=10)
        )

    def forward(self, input):
        output = self.conv_layers(input)
        output = self.fc_layers(output)
        return output

def train(model, device, train_loader, optimizer, loss_fn):
    model.train()
    total = 0
    correct_predict = 0

    for batch_index, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()

        output = model(data)
        loss = loss_fn(output, target)

        loss.backward()
        optimizer.step()

        _, predicted = output.max(1)
        correct_predict += predicted.eq(target).sum().item()
        total += target.size(0)

    return 100 * correct_predict / total

def test(model, device, test_loader, loss_fn):
    model.eval()
    total = 0
    correct_predict = 0
    
    with torch.no_grad():
        for batch_index, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)

            output = model(data)
            loss = loss_fn(output, target)

            _, predicted = output.max(1)
            total += target.size(0)
            correct_predict += predicted.eq(target).sum().item()

    return 100 * correct_predict/total

def main():
    kernel_sizes = [3,5,7]
    num_conv_layers = [3, 4, 5]
    lrs = [0.1, 0.01, 0.001, 0.0001]
    num_epochs = 3

    best_accuracy = 0
    best_params = None
    
    train_accuracies = []
    test_accuracies = []
    loss_fn = nn.CrossEntropyLoss()

    results = {}

    for num_conv_layer in num_conv_layers:
        for kernel_size in kernel_sizes:
            for lr in lrs:

                config_key = f"L{num_conv_layer}_K{kernel_size}_LR{lr}"
                results[config_key] = {
                    'train_acc': [], 'test_acc': [],
                    'params': {'layers': num_conv_layer, 'kernel': kernel_size, 'lr': lr}
                }

                cnn_dynamic = CNN_dynamic(kernel_size, num_conv_layer).to(device)
                optimizer = optim.Adam(cnn_dynamic.parameters(), lr)

                for epoch in range(num_epochs):
                    train_acc = train(cnn_dynamic, device, train_loader, optimizer, loss_fn)
                    test_acc = test(cnn_dynamic, device, val_loader, loss_fn)

                    train_accuracies.append(train_acc)
                    test_accuracies.append(test_acc)

                    results[config_key]['train_acc'].append(train_acc)
                    results[config_key]['test_acc'].append(test_acc)

                    if test_acc > best_accuracy:
                        best_accuracy = test_acc
                        best_params = {'num_con_layer':num_conv_layer,'kernel_size': kernel_size, 'lr': lr }
                        
                    print(f"Epoch {epoch+1}/{num_epochs}, Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%")
                    print(f"Config: {config_key}")
                
                plt.figure(figsize=(8, 4))
                plt.plot(range(1, num_epochs+1), results[config_key]['train_acc'], '-o', label='Train')
                plt.plot(range(1, num_epochs+1), results[config_key]['test_acc'], '-s', label='Validation')
                plt.title(f"Layers={num_conv_layer}, Kernel={kernel_size}, LR={lr}")
                plt.xlabel('Epoch')
                plt.ylabel('Accuracy (%)')
                plt.legend()
                plt.grid(True)
                plt.tight_layout()
                plt.savefig(f'accuracy_L{num_conv_layer}_K{kernel_size}_LR{lr}.png')
                plt.close()

    best_key = f"L{best_params['layers']}_K{best_params['kernel']}_LR{best_params['lr']}"
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, num_epochs+1), results[best_key]['train_acc'], '-bo', label='Train')
    plt.plot(range(1, num_epochs+1), results[best_key]['test_acc'], '-ro', label='Validation')
    plt.title(f"Best Model: {best_params}")
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    print(f"Best accuracy: {best_accuracy:.2f}%")
    print(f"Best parameters: {best_params}")



RuntimeError: Dataset not found. You can use download=True to download it

In [1]:
main()

NameError: name 'main' is not defined