In [64]:
import torch
import torchvision
import os 
import time

cwd = os.getcwd()
BATCH_SIZE = 32
EPOCHS = 20
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_save_path = os.path.join(cwd, "models")

In [65]:
#Data
def get_MNIST_data():
    
    
    train_dataset = torchvision.datasets.MNIST(root=f"{cwd}\data\MNIST", train=True, download=True, transform=torchvision.transforms.ToTensor())
    test_dataset = torchvision.datasets.MNIST(root=f"{cwd}\data\MNIST", train=False, download=True, transform=torchvision.transforms.ToTensor()) 
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    return train_loader, test_loader

def get_CIFAR10_data():
    transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    trainset = torchvision.datasets.CIFAR10(root=f"{cwd}\data\CIFAR10", train=True, download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
    testset = torchvision.datasets.CIFAR10(root=f"{cwd}\data\CIFAR10", train=False, download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
    return train_loader, test_loader

In [66]:
#Models
class MNIST_model1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 1
        self.conv = torch.nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2)
        self.relu = torch.nn.ReLU()
        self.flatten = torch.nn.Flatten()
        self.fc = torch.nn.Linear(16 * 28 * 28, 10)

    def forward(self, x):
        out = self.relu(self.conv(x))  
        out = self.flatten(out)
        out = self.fc(out)
        return out

class MNIST_model2(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 2
        self.conv_block = torch.nn.Sequential(
            torch.nn.Conv2d(1, 6, kernel_size=5, padding=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(6, 16, kernel_size=5, padding=2),
            torch.nn.ReLU()
        )
        self.flatten = torch.nn.Flatten()
        self.fc_block = torch.nn.Sequential(
            torch.nn.Linear(16 * 28 * 28, 84),
            torch.nn.ReLU(),
            torch.nn.Linear(84, 10)
        )

    def forward(self, x):
        out = self.conv_block(x)
        out = self.flatten(out)
        out = self.fc_block(out) 
        return out
    
class MNIST_model3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 3
        self.conv_block = torch.nn.Sequential(
            torch.nn.Conv2d(1, 6, kernel_size=5, padding=2),
            torch.nn.BatchNorm2d(6),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2),
            torch.nn.Conv2d(6, 16, kernel_size=5, padding=2),
            torch.nn.BatchNorm2d(16),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )
        self.flatten = torch.nn.Flatten()
        self.fc_block = torch.nn.Sequential(
            torch.nn.Linear(16 * 7 * 7, 120),
            torch.nn.ReLU(),
            torch.nn.Linear(120, 84),
            torch.nn.ReLU(),
            torch.nn.Linear(84, 10)
        ) 

    def forward(self, x):
        out = self.conv_block(x)
        out = self.flatten(out)
        out = self.fc_block(out)
        return out
    
class CIFAR10_model1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 4
        self.conv = torch.nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2)
        self.relu = torch.nn.ReLU()
        self.flatten = torch.nn.Flatten()
        self.fc = torch.nn.Linear(16 * 32 * 32, 10)

    def forward(self, x):
        out = self.relu(self.conv(x))  
        out = self.flatten(out)
        out = self.fc(out)
        return out

class CIFAR10_model2(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 5
        self.conv_block = torch.nn.Sequential(
            torch.nn.Conv2d(3, 6, kernel_size=5, padding=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(6, 16, kernel_size=5, padding=2),
            torch.nn.ReLU()
        )
        self.flatten = torch.nn.Flatten()
        self.fc_block = torch.nn.Sequential(
            torch.nn.Linear(16 * 32 * 32, 84),
            torch.nn.ReLU(),
            torch.nn.Linear(84, 10)
        )

    def forward(self, x):
        out = self.conv_block(x)
        out = self.flatten(out)
        out = self.fc_block(out) 
        return out

class CIFAR10_model3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 6
        self.conv_block = torch.nn.Sequential(
            torch.nn.Conv2d(3, 6, kernel_size=5, padding=2),
            torch.nn.BatchNorm2d(6),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2),
            torch.nn.Conv2d(6, 16, kernel_size=5, padding=2),
            torch.nn.BatchNorm2d(16),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )
        self.flatten = torch.nn.Flatten()
        self.fc_block = torch.nn.Sequential(
            torch.nn.Linear(16 * 8 * 8, 120),
            torch.nn.ReLU(),
            torch.nn.Linear(120, 84),
            torch.nn.ReLU(),
            torch.nn.Linear(84, 10)
        ) 

    def forward(self, x):
        out = self.conv_block(x)
        out = self.flatten(out)
        out = self.fc_block(out)
        return out
    
class Bonus_CIFAR10_model1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.index = 7
        self.conv_block1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 9, kernel_size=3, padding=1),  # Smaller kernel, more channels
            torch.nn.BatchNorm2d(9),
            torch.nn.ReLU()
        )

        self.conv_block2 = torch.nn.Sequential(
            torch.nn.Conv2d(9, 15, kernel_size=4, padding=1),
            torch.nn.BatchNorm2d(15),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )

        self.conv_block3 = torch.nn.Sequential(
            torch.nn.Conv2d(15, 30, kernel_size=5, padding=1),  # Increase channels further
            torch.nn.BatchNorm2d(30),
            torch.nn.ReLU()
        ) 

        # Calculate the flattened output size before the linear layers
        self.flatten_size = self._calculate_flatten_size()  

        self.fc_block = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(self.flatten_size, 256),
            torch.nn.Dropout(0.2),
            torch.nn.ReLU(),
            torch.nn.Linear(256, 128),
            torch.nn.Dropout(0.3),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 10)  # Output for 10 CIFAR10 classes
        )

    def _calculate_flatten_size(self):
        # Simulate a forward pass to get the flattened output size 
        test_input = torch.randn(1, 3, 32, 32) 
        out = self.conv_block3(self.conv_block2(self.conv_block1(test_input)))
        return out.flatten(start_dim=1).shape[1]

    def forward(self, x):
        out = self.conv_block3(self.conv_block2(self.conv_block1(x)))
        out = self.fc_block(out)
        return out

In [67]:
#Functions
def Get_models():
    mnist_model1 = MNIST_model1()
    mnist_model2 = MNIST_model2()
    mnist_model3 = MNIST_model3()
    cifar10_model1 = CIFAR10_model1()
    cifar10_model2 = CIFAR10_model2()
    cifar10_model3 = CIFAR10_model3()
    bonuscifar10_model = Bonus_CIFAR10_model1()

    
    return mnist_model1, mnist_model2, mnist_model3, cifar10_model1, cifar10_model2, cifar10_model3, bonuscifar10_model

In [68]:
#Train and test function
def train_model(model, optimizers, loss, train_loader):
    model.train()
    for epoch in range(EPOCHS):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(DEVICE), target.to(DEVICE)
            optimizer = optimizers[model.index-1]
            optimizer.zero_grad()  # Clear previous gradients
            output = model(data)   # Forward pass
            loss_val = loss(output, target)  # Calculate loss
            loss_val.backward()        # Compute gradients
            optimizer.step()       # Update model parameters 

            if batch_idx % 1000 == 0:  # Print progress
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data), len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), loss_val.item()))
    torch.save(model.state_dict(), os.path.join(model_save_path, f"model_{model.index}.pth"))
            
    
def test_models(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    loss_fn = torch.nn.CrossEntropyLoss()
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)
            # sum up batch loss
            test_loss += loss_fn(output, target).item()
            # get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), accuracy))

In [69]:

mnist_train_loader, mnist_test_loader = get_MNIST_data()
cifar10_train_loader, cifar10_test_loader = get_CIFAR10_data()
loss_fn = torch.nn.CrossEntropyLoss()
optimizers = []
#Getting models
models = Get_models()
os.makedirs(model_save_path, exist_ok=True) 

for i in range(len(models)):
    optimizers.append(torch.optim.Adam(models[i].parameters(), lr=0.001))

Files already downloaded and verified
Files already downloaded and verified


In [70]:
#check if all modles are trained
time_data = []

print("-----------------------------------------------------")
for model in models:
    print(f"Model {model.index} Loading...")
    if os.path.exists(os.path.join(model_save_path, f"model_{model.index}.pth")):
        model.load_state_dict(torch.load(os.path.join(model_save_path, f"model_{model.index}.pth")))
        continue
    else:
        curtime = time.time()
        if model.index <=3:
            print(f"MNIST model {model.index} is not trained, Training...")
            train_model(model, optimizers, loss_fn, mnist_train_loader)
        elif model.index == 7:
            print(f"Bonus CIFAR10 model {model.index} is not trained, Training...")
            train_model(model, optimizers, loss_fn, cifar10_train_loader)
        else:
            print(f"CIFAR10 model {model.index} is not trained, Training...")
            train_model(model, optimizers, loss_fn, cifar10_train_loader)
            
        finish_time = time.time()
        time_data.append(finish_time - curtime)

-----------------------------------------------------
Model 1 Loading...
MNIST model 1 is not trained, Training...
Model 2 Loading...
MNIST model 2 is not trained, Training...
Model 3 Loading...
MNIST model 3 is not trained, Training...
Model 4 Loading...
CIFAR10 model 4 is not trained, Training...
Model 5 Loading...
CIFAR10 model 5 is not trained, Training...
Model 6 Loading...
CIFAR10 model 6 is not trained, Training...
Model 7 Loading...
Bonus CIFAR10 model 7 is not trained, Training...


In [71]:
print("-----------------------------------------------------")
log_file = os.path.join(model_save_path, "time_log.txt")
for model in models:
    try:
        print(f"Time to train model in seconds for model {model.index}: {time_data[model.index-1]}")
        #store time in log file
        with open(log_file, 'a') as f:
            f.write(f"Time to train model in seconds for model {model.index}: {time_data[model.index-1]}\n")
    except:
        pass
    if model.index <=3:
        print(f"MNIST model {model.index} Testing...")
        test_models(model, mnist_test_loader)
    elif model.index == 7:
        print(f"Bonus CIFAR10 model {model.index} Testing...")
        test_models(model, cifar10_test_loader)
    else:
        print(f"CIFAR10 model {model.index} Testing...")
        test_models(model, cifar10_test_loader)
print("-----------------------------------------------------")

-----------------------------------------------------
Time to train model in seconds for model 1: 245.1836473941803
MNIST model 1 Testing...

Test set: Average loss: 0.0031, Accuracy: 9822/10000 (98%)

Time to train model in seconds for model 2: 572.8232760429382
MNIST model 2 Testing...

Test set: Average loss: 0.0027, Accuracy: 9876/10000 (99%)

Time to train model in seconds for model 3: 305.9659881591797
MNIST model 3 Testing...

Test set: Average loss: 0.0011, Accuracy: 9915/10000 (99%)

Time to train model in seconds for model 4: 323.540992975235
CIFAR10 model 4 Testing...

Test set: Average loss: 0.0705, Accuracy: 5589/10000 (56%)

Time to train model in seconds for model 5: 683.2310018539429
CIFAR10 model 5 Testing...

Test set: Average loss: 0.1338, Accuracy: 5754/10000 (58%)

Time to train model in seconds for model 6: 397.55051922798157
CIFAR10 model 6 Testing...

Test set: Average loss: 0.0355, Accuracy: 6703/10000 (67%)

Time to train model in seconds for model 7: 879.4030

In [None]:
"""The model with the least amount of error for validation was MNIST model 3 with an accuracy of 99% and an Accuracy of 9874/10000
    For the CIFAR10 model 7 with an accuracy of 73% and an Accuracy of 7282/10000
    
    The time to train each model was and accuracy for all of them is: 
    
    Time to train model in seconds for model 1: 245.1836473941803
    MNIST model 1 Testing...

    Test set: Average loss: 0.0031, Accuracy: 9822/10000 (98%)

    Time to train model in seconds for model 2: 572.8232760429382
    MNIST model 2 Testing...

    Test set: Average loss: 0.0027, Accuracy: 9876/10000 (99%)

    Time to train model in seconds for model 3: 305.9659881591797
    MNIST model 3 Testing...

    Test set: Average loss: 0.0011, Accuracy: 9915/10000 (99%)

    Time to train model in seconds for model 4: 323.540992975235
    CIFAR10 model 4 Testing...

    Test set: Average loss: 0.0705, Accuracy: 5589/10000 (56%)

    Time to train model in seconds for model 5: 683.2310018539429
    CIFAR10 model 5 Testing...

    Test set: Average loss: 0.1338, Accuracy: 5754/10000 (58%)
    ...

    Test set: Average loss: 0.0373, Accuracy: 7310/10000 (73%)
    
"""