In [1]:
!nvidia-smi

zsh:1: command not found: nvidia-smi


In [2]:
!/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install tqdm

Defaulting to user installation because normal site-packages is not writeable


In [12]:
# Import the required modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.datasets import MNIST
from torch.utils.data import random_split, DataLoader
import torch.optim as optim
from tqdm import tqdm

In [13]:
# Fix the randomness
seed = 80
torch.manual_seed(seed)

<torch._C.Generator at 0x1073e4ad0>

In [14]:
from torchvision.datasets import CIFAR10 
import torchvision.transforms as T
train_transform = T.Compose([
# can add additional transforms on images 
    T.ToTensor(), # convert images to PyTorch tensors
    T.Grayscale(), # RGB to grayscale 
    T.Normalize(mean=(0.5,), std=(0.5,)) # normalization
# speeds up the convergence # and improves the accuracy
])
val_transform = test_transform = T.Compose([ 
    T.ToTensor(),
    T.Grayscale(), 
    T.Normalize(mean=(0.5,), std=(0.5,))
])
train_set = CIFAR10(root='CIFAR10' , train=True , transform=train_transform , download=True) 
test_set = CIFAR10(root='CIFAR10'  , train=False, transform=test_transform , download=True)

train_set_length = int(0.8 * len(train_set))
val_set_length = len(train_set) - train_set_length

train_set, val_set = random_split(train_set, [train_set_length, val_set_length])


Files already downloaded and verified
Files already downloaded and verified


In [15]:
batch_size = 8
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size*2, shuffle=True)
test_loader = DataLoader(train_set, batch_size=batch_size*2, shuffle=True)


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [16]:
for data in train_loader:
    print(data)
    break

[tensor([[[[-1.0000, -0.9373, -0.8196,  ..., -0.6950, -0.7735, -0.8613],
          [-1.0000, -0.9922, -0.9843,  ..., -0.5348, -0.6447, -0.7740],
          [-0.9922, -0.9922, -0.9922,  ..., -0.3830, -0.3761, -0.6747],
          ...,
          [-1.0000, -1.0000, -0.9922,  ..., -0.3196, -0.3343, -0.3477],
          [-1.0000, -0.9922, -0.9922,  ..., -0.5957, -0.6059, -0.6216],
          [-1.0000, -0.9922, -0.9922,  ..., -0.8419, -0.8498, -0.8654]]],


        [[[-0.2866, -0.1981,  0.1816,  ...,  0.7780,  0.6996,  0.6899],
          [-0.1551, -0.1073,  0.1643,  ...,  0.8509,  0.8499,  0.8448],
          [ 0.4708,  0.4666,  0.5000,  ...,  0.8915,  0.9033,  0.9181],
          ...,
          [ 0.2376,  0.2464,  0.2601,  ..., -0.3847, -0.3277, -0.4369],
          [ 0.0098,  0.0995,  0.2864,  ..., -0.5426, -0.5025, -0.5543],
          [ 0.1257,  0.4984,  0.4848,  ..., -0.5643, -0.6290, -0.6014]]],


        [[[-0.2887, -0.2495, -0.2149,  ..., -0.7050, -0.6677, -0.6934],
          [-0.2306, -0.22

In [17]:
#defining parameters
activation_functions = ['relu', 'sigmoid', 'Tanh']
learning_rates = [5e-3, 1e-3, 5e-4]
number_of_layers = [1, 2, 3, 4] 

In [18]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class MY_ANN(nn.Module):
    def __init__(self, input_size, output_size, total_hidden_layers):
        super(MY_ANN, self).__init__()
        self.total_hidden_layers = total_hidden_layers
        self.deep_nn = nn.Sequential()
        
        for i in range(total_hidden_layers):
            
            if i+1 == total_hidden_layers:
                self.deep_nn.add_module(f'fc{i+1}',nn.Linear(input_size,output_size))
                
            else:
                self.deep_nn.add_module(f'fc{i+1}',nn.Linear(input_size,input_size//2))
                input_size = input_size//2
    
    def forward(self, x, activation_function):
        
        total_iterations_next = (self.total_hidden_layers)
        x = torch.flatten(x, 1)
        
        for i in range(total_iterations_next-1):
            if activation_function == activation_functions[0]:
                x = F.relu(self.deep_nn[i](x))
            elif activation_function == activation_functions[1]:
                x = F.sigmoid(self.deep_nn[i](x))
            elif activation_function == activation_functions[2]:
                x = F.tanh(self.deep_nn[i](x))  
            
        x = self.deep_nn[total_iterations_next-1](x)     
        return x
            
    def actual_training(self, learning_rate, activation_function, train_loader):
        
        optimizer = optim.Adam(self.parameters(),learning_rate)
        epochs = 7
        loss = nn.CrossEntropyLoss()
        
        for epoch in range(epochs):
            
            for picture in train_loader:
                X, Y = picture
                X, Y = X.to(device), Y.to(device)
                output = self.forward(X, activation_function)
                our_loss = loss(output,Y)
                optimizer.zero_grad()
                our_loss.backward()
                optimizer.step()
                
    def evaluation(self, val_loader, activation_function):
        total_guesses = 0
        correct_guesses = 0
        accumulative_loss = 0
        
        loss = nn.CrossEntropyLoss()
        
        with torch.no_grad():
            for picture in val_loader:
                X, Y = picture
                output = self.forward(X, activation_function)
                
                for index, i in enumerate(output):
    
                    total_guesses = total_guesses + 1
                    
                    if torch.argmax(i) == Y[index]:
                        correct_guesses = correct_guesses + 1
                        
                accumulative_loss = accumulative_loss + loss(self.forward(X, activation_function), Y)
                
        loss = accumulative_loss / len(val_set)
        return round(correct_guesses/total_guesses, 3), loss

net = MY_ANN(32*32, 10, 3)
print(net)            
                

MY_ANN(
  (deep_nn): Sequential(
    (fc1): Linear(in_features=1024, out_features=512, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=10, bias=True)
  )
)


In [19]:
total_guesses = 0
correct_guesses = 0
accumulative_loss = 0

loss = nn.CrossEntropyLoss()

with torch.no_grad():
    for picture in train_loader:
        X, Y = picture
        output = net.forward(X, activation_functions[0])
        #print(output)
        for index, i in enumerate(output):
            total_guesses = total_guesses + 1
            
            if torch.argmax(i) == Y[index]:
                correct_guesses = correct_guesses + 1 

        accumulative_loss = accumulative_loss + loss(net.forward(X, activation_functions[0]), Y)
        
print("Accuracy: ", round(correct_guesses/total_guesses, 3))
loss = accumulative_loss / len(train_loader)
print("Loss: ", loss)



Accuracy:  0.095
Loss:  tensor(2.3059)


In [20]:
for layer in number_of_layers:
    for activation_func in activation_functions:
        for learning_rate in learning_rates:
            net = MY_ANN(32*32, 10, layer)
            net.actual_training(learning_rate, activation_func, train_loader)
            validation_accuracy, validation_loss = net.evaluation(val_loader, activation_func) 
            #print(no_layer, activation_func, learning_rate, validation_accuracy, validation_loss)
            #grid_dictionary[no_layer, activation_func, learning_rate] = [validation_accuracy, validation_loss]
            
            total_guesses = 0
            correct_guesses = 0
            
            net.eval()
            with torch.no_grad():
                for picture in test_loader:
                    X, Y = picture
                    output = net.forward(X, activation_func)
                    
                    _, predicted = torch.max(output.data, 1)
                    total_guesses += Y.size(0)
                    correct_guesses += (predicted == Y).sum()
                    
            print(layer, activation_func, learning_rate, correct_guesses/total_guesses)        
            #print('Accuracy of the network on the 10000 test images: %d %%' % (correct / total))


1 relu 0.005 tensor(0.2370)
1 relu 0.001 tensor(0.3125)
1 relu 0.0005 tensor(0.3181)
1 sigmoid 0.005 tensor(0.2439)
1 sigmoid 0.001 tensor(0.3072)
1 sigmoid 0.0005 tensor(0.3101)
1 Tanh 0.005 tensor(0.2341)
1 Tanh 0.001 tensor(0.3020)
1 Tanh 0.0005 tensor(0.3132)
2 relu 0.005 tensor(0.3317)
2 relu 0.001 tensor(0.5333)
2 relu 0.0005 tensor(0.5708)
2 sigmoid 0.005 tensor(0.3187)
2 sigmoid 0.001 tensor(0.5753)
2 sigmoid 0.0005 tensor(0.5282)
2 Tanh 0.005 tensor(0.2145)
2 Tanh 0.001 tensor(0.4566)
2 Tanh 0.0005 tensor(0.5490)
3 relu 0.005 tensor(0.1628)
3 relu 0.001 tensor(0.5136)
3 relu 0.0005 tensor(0.6059)
3 sigmoid 0.005 tensor(0.2888)
3 sigmoid 0.001 tensor(0.5176)
3 sigmoid 0.0005 tensor(0.5117)
3 Tanh 0.005 tensor(0.2161)
3 Tanh 0.001 tensor(0.4196)
3 Tanh 0.0005 tensor(0.5511)
4 relu 0.005 tensor(0.1087)
4 relu 0.001 tensor(0.5225)
4 relu 0.0005 tensor(0.6046)
4 sigmoid 0.005 tensor(0.2982)
4 sigmoid 0.001 tensor(0.4981)
4 sigmoid 0.0005 tensor(0.5055)
4 Tanh 0.005 tensor(0.2015)
4