<a href="https://colab.research.google.com/github/ItayShalit/Intro-to-Deep-Learning/blob/main/Intro_to_DL_Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.nn.utils import skip_init
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
import numpy as np


## Part 1


### Data Loading

In [None]:
# transform = transforms.Compose(
#     [transforms.ToTensor(),
#      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

TRAIN_SIZE = 5000
TEST_SIZE = 1000
# batch_size = 4

# trainSampler = torch.utils.data.RandomSampler(TRAIN_SIZE)
# testSampler = torch.utils.data.RandomSampler(TEST_SIZE)



trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, 
                                        transform = transforms.ToTensor())
sample_train_idx = torch.randint(len(trainset), size=(TRAIN_SIZE,))
#trainSampler = torch.utils.data.SubsetRandomSampler(indices = sample_train_idx)
# trainSampler = torch.utils.data.WeightedRandomSampler(weights = [1 for i in range(TRAIN_SIZE)], replacement = True, num_samples = TRAIN_SIZE)

# trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size,
#                                           num_workers=2, shuffle = False, sampler = trainSampler)

testset = torchvision.datasets.CIFAR10(root = './data', train = False,
                                       download = True, transform = transforms.ToTensor())
sample_test_idx = torch.randint(len(testset), size=(TEST_SIZE,))
# testSampler = torch.utils.data.SubsetRandomSampler(indices = sample_test_idx)
# testloader = torch.utils.data.DataLoader(testset, batch_size = batch_size,
#                                          num_workers = 2, shuffle = True)

# classes = ('plane', 'car', 'bird', 'cat',
#            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


### Training an SVM Classifier 

In [None]:
MAX_INTENSITY = 255

train_x = trainset.data/MAX_INTENSITY
train_y = trainset.targets

test_x = testset.data/MAX_INTENSITY
test_y = testset.targets

train_x = train_x.reshape(train_x.shape[0], -1)
train_x = train_x[sample_train_idx,:]
train_y = np.array(train_y)[sample_train_idx]

test_x = test_x.reshape(test_x.shape[0], -1)
test_x = test_x[sample_test_idx,:]
test_y = np.array(test_y)[sample_test_idx]

#### Hyperparameter Tuning

In [None]:
param_grid1 = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf']}
param_grid2 = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['linear']}

grid1 = GridSearchCV(SVC(),param_grid1, refit=True, verbose=2, n_jobs = -1)
grid2 = GridSearchCV(SVC(),param_grid1, refit=True, verbose=2, n_jobs = -1)

grid1.fit(train_x, train_y)
grid2.fit(train_x, train_y)

#### Training the Models

In [None]:
params1 = grid1.best_params_
params2 = grid2.best_params_

rbf_model = SVC(C = params1["C"], kernel = "rbf", gamma = params1["gamma"])
linear_model = SVC(C = params2["C"], kernel = "linear", gamma = params2["gamma"])

rbf_model.fit(train_x, train_y)
linear_model.fit(train_x, train_y)

test_pred_rbf = rbf_model.predict(test_x)
test_pred_linear = linear_model.predict(test_x)

print(f"""accuracy score for linear model is: {accuracy_score(test_y, test_pred_linear)} \n 
        accuracy score for rbf model is: {accuracy_score(test_y, test_pred_rbf)}""")


## Part 2

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [None]:
class FFNeuralNetwork(nn.Module):
    def __init__(self):
        super(FFNeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        input_layer = skip_init(nn.Linear, 3072, 256) #To save computations, Parameter initialization is currently skipped, 
                                                      #and will be done before training using a custom distribution.
        hidden_layer = skip_init(nn.Linear, 256, 10)
        self.linear_relu_stack = nn.Sequential(
            input_layer,
            nn.ReLU(),
            hidden_layer,
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
model = FFNeuralNetwork().to(device)
print(model)

FFNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=3072, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=10, bias=True)
  )
)


### Training 

In [1]:
def train_loop(dataloader, model, loss_fn, optimizer, print_progress = False):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if print_progress&(batch % 100 == 0):
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn, return_results = False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    if return_results:
      return test_loss, correct

#### Hyperparameter Tuning

In [None]:
from sys import maxint

learning_rate_vals = [1e-1, 1e-2, 1e-3]
init_std_vals = [1e-5, 1e-3, 1e-1]
momentum_param_vals = [0.9, 0.95, 0.99] 

best_loss = maxint
best_configuration = {'learning_rate':None, 'init_std': None, momentum_param: None}

loss_fn = nn.CrossEntropyLoss()
batch_size = 64
epochs = 10
#Should sample the same samples as those I used for training and testing SVM.
trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size,
                                          num_workers=2, shuffle = False, sampler = trainSampler)
testloader = torch.utils.data.DataLoader(testset, batch_size = batch_size,
                                          num_workers=2, shuffle = False, sampler = trainSampler)

for learning_rate in learning_rate_vals:
  for init_std in init_std_vals:
    for momentum_param in momentum_param_vals:
      torch.nn.init.normal_(model.input_layer.weight, mean=0.0, std=init_std)
      torch.nn.init.normal_(model.hidden_layer.weight, mean=0.0, std=init_std)
      optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum_param)
      for t in range(epochs):
        # print(f"Epoch {t+1}\n-------------------------------")
        train_loop(train_dataloader, model, loss_fn, optimizer)
        # test_loop(test_dataloader, model, loss_fn)
      loss, accuracy = test_loop(dataloader, model, loss_fn,  True)
      print("learning rate: ", learning_rate, "init_std: ", init_std , "momentum_param: ", momentum_param, "loss: ", loss)
      if loss<best_loss:
        best_loss = loss
        best_configuration["learning rate"] = learning_rate
        best_configuration["momentum_param"] = momentum_param
        best_configuration["init_std"] = init_std


SyntaxError: ignored

### Plotting training procces with best configuration

In [None]:
torch.nn.init.normal_(model.input_layer.weight, mean=0.0, std=best_configuration.init_std)
torch.nn.init.normal_(model.hidden_layer.weight, mean=0.0, std=best_configuration.init_std)
optimizer = torch.optim.SGD(model.parameters(), lr=best_configuration["learning rate"], momentum=best_configuration["momentum_param"])
train_accuracy_vals, train_loss_vals, test_accuracy_vals, test_loss_vals = [], [], [], []

for t in range(epochs):
    train_loop(train_dataloader, model, loss_fn, optimizer)
    pred = #prediction over train_set
    train_accuracy_vals.append(torch.sum(pred == y)/#not completed)
    train_loss_vals.append() 
    test_loss, test_accuracy = test_loop(test_dataloader, model, loss_fn, True)
    test_loss_vals.append(test_loss)
    test_accuracy_vals.append(test_accuracy)

fig = plt.figure(figsize=(1,2))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)
ax1.plot([i for i in range(epochs)], train_accuracy_vals)
ax1.plot([i for i in range(epochs)], test_accuracy_vals)
ax2.plot([i for i in range(epochs)], train_loss_vals)
ax2.plot([i for i in range(epochs)], test_loss_vals)
fig.show()