# Q1
Implement a neural network and utilize the CIFAR-10 dataset for the analysis.
1. Utilize various activation functions like sigmoid, tanh and critique the performance in
each case.
2. Increase the depth of the given network by adding more Fully-Connected layers till the
point you encounter the vanishing gradient problem. With the help of the results, mention
how to identify it.
3. Suggest and implement methods to overcome the above problem.



---
 
# Sigmoid one layer



In [None]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:


def gradient_check_n(model, X, Y, epsilon=1e-7):

   
    parameters_values = dictionary_to_vector(model)
    grad = gradients_to_vector(model)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    
    for i in range(num_parameters):
        
        
        thetaplus =  np.copy(parameters_values)                                       # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon                                   # Step 2
        J_plus[i] =  forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))  # Step 3
        
        thetaminus = np.copy(parameters_values)                                       # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon                                 # Step 2        
        J_minus[i] = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3
        
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
       
    
    
    numerator = np.linalg.norm(grad - gradapprox)                                     # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)                   # Step 2'
    difference = numerator / denominator                                              # Step 3'
    
    truth=0
    if difference > 1e-7:
        truth=1
   
    return truth

In [None]:
import wandb

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
wandb.init(project="sigmoid1")

[34m[1mwandb[0m: Currently logged in as: [33msharma-87[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
config = wandb.config

In [None]:
config = dict(
    epochs=10,
    classes=10,
    batch_size=32,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [None]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to path/to/data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting path/to/data/cifar-10-python.tar.gz to path/to/data
Files already downloaded and verified


In [None]:
for i, data in enumerate(train_dataset):
    inputs, labels = data
   # print('Labels:', labels)
    print('Labels:', inputs.shape)
    if i == 2: # only show the first 3 data points
        break

Labels: torch.Size([1, 32, 32])
Labels: torch.Size([1, 32, 32])
Labels: torch.Size([1, 32, 32])


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = sig(self.fc1(x))
        x = self.fc2(x)
        return x
from torch import optim
model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)

[]

In [None]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [None]:
example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(7):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        loss = criterion(output, target.to(device))
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()

In [None]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.to(device))
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target.to(device)).sum().item()
    accuracy = 100 * correct / total
    wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))

Model accuracy: 37.51


In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██████
loss,█▆▄▄▄▅▅▄▃▅▃▄▇▆▅▁▂▅▃▃▅▃▂▁▂▃▇▁▄▃▅▅▄▁▃▃▂▂▃▃
test_accuracy,▁

0,1
epoch,6.0
loss,1.68155
test_accuracy,0.3751


In [None]:
inputs = inputs.cpu().detach().requires_grad_(requires_grad=True)

In [None]:
gradient_check_n(model, inputs, labels, epsilon=1e-7)

True

# Sigmoid 2 hidden layer 


In [None]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
import wandb
wandb.login()
wandb.init(project="sig2")

In [None]:
config = wandb
config = dict(
    epochs=10,
    classes=10,
    batch_size=32,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [None]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()
relu=torch.nn.ReLU()
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = sig(self.fc1(x))
        x = sig(self.fc2(x))
        x=sig(self.fc3(x))
        return x

model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(10):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        loss = criterion(output, target.to(device))
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()


In [None]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.to(device))
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target.to(device)).sum().item()
    accuracy = 100 * correct / total
    #wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))


Model accuracy: 35.19


In [None]:
inputs

tensor([[[1.0000, 0.9922, 0.9922,  ..., 0.9922, 0.9922, 0.9922],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 0.9961, 0.9961,  ..., 0.9961, 0.9961, 0.9961],
         ...,
         [0.4588, 0.4510, 0.4275,  ..., 0.3059, 0.3020, 0.3020],
         [0.4510, 0.4235, 0.4039,  ..., 0.2863, 0.2902, 0.3216],
         [0.4314, 0.4039, 0.3882,  ..., 0.3255, 0.3255, 0.3294]]],
       requires_grad=True)

In [None]:
gradient_check_n(model, inputs, labels, epsilon=1e-7)

True

In [None]:
#print("Model accuracy:", evaluate_model(model, test_loader))
wandb.finish()

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
loss,▇▇█▅▆▅▅▅▇▄▃▆▄▅▅▄▅▅▄▂▃▄▄▁▃▂▃▄▄▄▅▄▄▅▄▄▃▄▄▂

0,1
epoch,9.0
loss,1.93492


# Sigmoid 3 hidden layer 


In [None]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
import wandb
wandb.login()
wandb.init(project="sigmoid3")

In [None]:
config = wandb
config = dict(
    epochs=10,
    classes=10,
    batch_size=128,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [None]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    
    transforms.RandomHorizontalFlip(),

    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()
relu=torch.nn.ReLU()
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = sig(self.fc1(x))
        x = sig(self.fc2(x))
        x = sig(self.fc3(x))
        x=sig(self.fc4(x))
        return x

model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(7):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        data=data.to(device)
        output = model(data)
        target=target.to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()


In [None]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data=data.to(device)
            target=target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    #wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))


Model accuracy: 24.76


In [None]:
gradient_check_n(model, inputs, labels, epsilon=1e-5)

True

In [None]:
print("Model accuracy:", evaluate_model(model, test_loader))
wandb.finish()

Model accuracy: 24.59


VBox(children=(Label(value='0.001 MB of 0.051 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.012654…

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██████
loss,█▇▇▄▆▅▅▅▄▅▄▄▅▂▅█▄▃▄▄▃▅▃▅▁▃▅▃▃▅▃▅▂▄▃▁▃▄▃▅

0,1
epoch,6.0
loss,2.06783




---
 
# Tanh one layer



In [87]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [88]:
import wandb

wandb.login()

True

In [89]:
wandb.init(project="Tanh1")

In [90]:
config = wandb.config

In [91]:
config = dict(
    epochs=10,
    classes=10,
    batch_size=128,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [92]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
for i, data in enumerate(train_dataset):
    inputs, labels = data
   # print('Labels:', labels)
    print('Labels:', inputs.shape)
    if i == 2: # only show the first 3 data points
        break

Labels: torch.Size([1, 32, 32])
Labels: torch.Size([1, 32, 32])
Labels: torch.Size([1, 32, 32])


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()
t=torch.nn.Tanh()

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = t(self.fc1(x))
        x = self.fc2(x)
        return x

model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)

[]

In [None]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [None]:
example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(10):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        loss = criterion(output, target.to(device))
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()

In [None]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.to(device))
            _, predicted = torch.max(output.data, 1)
            target=target.to(device)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))

Model accuracy: 35.88


In [93]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

# Relu one hidden layer 


In [None]:
!pip install wandb -Uq

In [94]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [95]:
import wandb
wandb.login()
wandb.init(project="Relu1")

In [96]:
config = wandb
config = dict(
    epochs=10,
    classes=10,
    batch_size=128,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [97]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [98]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()
relu=torch.nn.ReLU()
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(10):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()


In [100]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))
print(gradient_check_n(model, inputs, labels, epsilon=1e-5))
wandb.finish()

Model accuracy: 37.82


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
loss,▆▆█▆▄█▆▆▅▄▃▄▄▁▄▄▃▃▂▆▅▆▄▄▄▄▄▄▃▅▄▂▁█▄▂▃▂▂▇
test_accuracy,▁

0,1
epoch,9.0
loss,1.91879
test_accuracy,0.3782


✌




# Relu 2 hidden layer 


In [101]:
!pip install wandb -Uq

In [102]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [103]:
import wandb
wandb.login()
wandb.init(project="Vanishing gradient")

In [104]:
config = wandb
config = dict(
    epochs=10,
    classes=10,
    batch_size=128,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [105]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [106]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()
relu=torch.nn.ReLU()
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = relu(self.fc1(x))
        x = relu(self.fc2(x))
        x=self.fc3(x)
        return x

model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(20):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        loss = criterion(output, target.to(device))
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()


In [108]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))
print(gradient_check_n(model, inputs, labels, epsilon=1e-5))


Model accuracy: 39.86
True


In [109]:
wandb.finish()

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
loss,██▆▆▇▄▅▆▅▆▆▃▄▂▄▆▃▅▃▄▄▃▄▃▄▅▂▃▁▃▄▃▃▃▃▆▁▅▃▂
test_accuracy,▁

0,1
epoch,19.0
loss,1.58406
test_accuracy,0.3986


✌




# Relu 3 hidden layer 


In [None]:
!pip install wandb -Uq

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.9/2.0 MB[0m [31m85.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.0/184.0 KB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.9/178.9 KB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
import os
import random

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
import wandb
wandb.login()
wandb.init(project="ReLu3hiddenlayer")

In [None]:
config = wandb
config = dict(
    epochs=10,
    classes=10,
    batch_size=32,
    learning_rate=0.001,
    dataset="CFIAR",
    architecture="MLP")

In [None]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    
    transforms.RandomHorizontalFlip(),

    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    
])

train_dataset = datasets.CIFAR10(root='path/to/data', train=True,
                                download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='path/to/data', train=False,
                               download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)
sig=torch.nn.Sigmoid()
relu=torch.nn.ReLU()
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.view(-1, 32*32)
        x = relu(self.fc1(x))
        x = relu(self.fc2(x))
        x = relu(self.fc3(x))
        x=self.fc4(x)
        return x

model = MLP()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
wandb.watch(model, criterion, log="all", log_freq=10)
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(10):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        data=data.to(device)
        output = model(data)
        target=target.to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()


In [None]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data=data.to(device)
            target=target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    #wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))


Model accuracy: 38.06


In [None]:
print("Model accuracy:", evaluate_model(model, test_loader))
wandb.finish()

Model accuracy: 37.9


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
loss,█▇▅▃▆▃▆▆▂▂▃▄▄▂▆▅▄▆▄▃▅▅▃▂▁▅▂▂▅▄▄▂▃▁▂▃▄▂▃▃

0,1
epoch,9.0
loss,1.75149


# Q2

Implement a neural network on the Gurmukhi dataset and implement the following regularization
techniques from scratch:
1. L-1 regularization
2. L-2 regularization
3. Dropout

Compare the performance of the above techniques and mention reasons to support your
answer. You are free to utilize PyTorch's inbuilt functions for implementing activation and loss
functions. However, various regularization techniques must be implemented from scratch
without the support of any library.
Also, implement gradient checking (from scratch) to verify the values of gradients during
backpropagation.

# L1


In [110]:
from google.colab import drive

# Mount your Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [111]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, random_split
from PIL import Image

class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.img_paths = []
        self.labels = []
        
        for label in os.listdir(root_dir):
            label_dir = os.path.join(root_dir, label)
            if not os.path.isdir(label_dir):
                continue
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                self.img_paths.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)
        label = int(self.labels[idx])
        return img, label



In [None]:
!pip install wandb -Uq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.0/184.0 KB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.9/178.9 KB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [112]:
import wandb

wandb.login()

True

In [113]:
wandb.init(project="gurnumL1")

In [114]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [115]:
transform = transforms.Compose([
                                transforms.ToTensor(),
                                ])

train_dataset = ImageDataset(root_dir='/content/drive/MyDrive/GurNum/train', transform=transform)
test_dataset = ImageDataset(root_dir='/content/drive/MyDrive/GurNum/val', transform=transform)


In [116]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=True)

In [117]:
for i, data in enumerate(train_dataset):
    inputs, labels = data
    print(data)
    
    if i == 2: # only show the first 3 data points
        break

(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)
(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 0.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)
(tensor([[[1., 1., 0.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         [0., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)


In [118]:
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=1*32*32, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=10)

    def forward(self, x):
        # x = self.pool(torch.relu(self.conv1(x)))
        # x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [119]:
model = Net()
model.to(device)

# Make sure to call input = input.to(device) on any input tensors that you feed to the model
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [120]:
wandb.watch(model, criterion, log="all", log_freq=10)

[]

In [121]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [122]:
def l1_loss(model, weight_decay):
    l1_loss = 0
    for name, param in model.named_parameters():
        if 'bias' not in name:
            l1_loss += torch.sum(torch.abs(param))
    return weight_decay * l1_loss

.1 , 5

In [None]:
example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(2):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        target=torch.tensor(target)
        reg_loss = l1_loss(model, 0.000001)
        loss = criterion(output, target.to(device)) +reg_loss
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()
    print("for epoch ",epoch, " loss= ",running_loss)   

In [124]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data=data.to(device)
            target=target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    #wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))


Model accuracy: 92.13483146067416


# l1 grad


In [125]:
def dictionary_to_vector(model):
  
    params = list(model.parameters())
    p=[]
    for param in params:
      p.append(param.cpu().detach().numpy())
     
    count = 0

    for i in range(len(p)):
  
  
        new_vector = np.reshape(p[i], (-1,1))
        
        
        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta, new_vector), axis=0)
        count = count + 1

    return theta

In [126]:
def gradients_to_vector(model):
    
    theta = np.reshape(model.fc1.weight.grad.cpu().detach().numpy(), (-1,1))
    theta1=np.concatenate((theta,np.reshape( model.fc1.bias.grad.cpu().detach().numpy(), (-1,1))), axis=0)
    theta2=np.concatenate((theta1, np.reshape(model.fc2.weight.grad.cpu().detach().numpy(), (-1,1))), axis=0)
    theta3=np.concatenate((theta2, np.reshape(model.fc2.bias.grad.cpu().detach().numpy(), (-1,1))), axis=0)

    return theta3

In [127]:
def vector_to_dictionary(theta):
    
    parameters = {}
    parameters["W1"] = theta[:131072].reshape(128, 1024)
    parameters["b1"] = theta[131072:131200].reshape((128,1))
    parameters["W2"] = theta[131200:132480].reshape((10, 128))
    parameters["b2"] = theta[132480:132490].reshape((10,1))

    return parameters

In [128]:
import numpy as np

def cross_entropy_loss(y_true, y_pred):
    
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

In [129]:
def relu(x):
   
    s = np.maximum(0,x)
    return s 

In [130]:
def forward_propagation_n(X, Y, parameters):
  
    

    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    X=X.flatten()
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
 
    output = np.dot(W2, A1) + b2
    cost=cross_entropy_loss(Y,output)
    

    
    return cost

In [131]:
import numpy as np
parameters_values= dictionary_to_vector(model)

In [132]:
params = list(model.parameters())

In [133]:
model.parameters

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=1024, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)>

In [134]:
for param in params:
  #print(param.data)
  print(param.shape)

torch.Size([128, 1024])
torch.Size([128])
torch.Size([10, 128])
torch.Size([10])


In [141]:


def gradient_check_n(model, X, Y, epsilon=1e-7):
   
    warnings.filterwarnings('ignore')  
    parameters_values = dictionary_to_vector(model)
    grad = gradients_to_vector(model)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    scaling=1                                                                                                                                                                                                                                                                                                                                        *.0000001
    
   
    for i in range(num_parameters):
        
        
        thetaplus =  np.copy(parameters_values)                                       # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon                                   # Step 2
        J_plus[i] =  forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))  # Step 3
        
    
        thetaminus = np.copy(parameters_values)                                       # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon                                 # Step 2        
        J_minus[i] = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3
     
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
      
 
    numerator = np.linalg.norm(grad - gradapprox)                                     # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)                   # Step 2'
    difference = numerator / denominator 
    difference = difference*scaling                                              # Step 3'
                                                                                                                                                                           
    truth=0
    if difference > 1e-7:
        truth=1
    
    return truth

In [136]:
i=0
for data, target in train_dataset:
  if i != 2 :

        print("F")
        
  
        i=i+1
  else :
    break


F
F


In [137]:
target

8

In [138]:
data=data.numpy()
data

array([[[1., 1., 0., ..., 1., 1., 1.],
        [1., 0., 0., ..., 1., 1., 1.],
        [0., 0., 0., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]]], dtype=float32)

In [142]:
gradient_check_n(model, data, target, 1e-7)

0

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
count_parameters(model)

132490

In [None]:
def dictionary_to_vector(model):
    """
    Roll all our parameters dictionary into a single vector satisfying our specific required shape.
    """
    #keys = []
    params = list(model.parameters())
    p=[]
    for param in params:
      p.append(param.cpu().detach().numpy())
     
    count = 0

    for i in range(len(p)):
  
        
        # flatten parameter
        new_vector = np.reshape(p[i], (-1,1))
        #keys = keys + [key]*new_vector.shape[0]
        
        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta, new_vector), axis=0)
        count = count + 1

    return theta

In [None]:
def gradients_to_vector(model):
    
    theta = np.reshape(model.fc1.weight.grad.cpu().detach().numpy(), (-1,1))
    theta1=np.concatenate((theta,np.reshape( model.fc1.bias.grad.cpu().detach().numpy(), (-1,1))), axis=0)
    theta2=np.concatenate((theta1, np.reshape(model.fc2.weight.grad.cpu().detach().numpy(), (-1,1))), axis=0)
    theta3=np.concatenate((theta2, np.reshape(model.fc2.bias.grad.cpu().detach().numpy(), (-1,1))), axis=0)

    return theta3

In [None]:
def vector_to_dictionary(theta):
  
    parameters = {}
    parameters["W1"] = theta[:131072].reshape(128, 1024)
    parameters["b1"] = theta[131072:131200].reshape((128,1))
    parameters["W2"] = theta[131200:132480].reshape((10, 128))
    parameters["b2"] = theta[132480:132490].reshape((10,1))

    return parameters

In [None]:
import numpy as np

def cross_entropy_loss(y_true, y_pred):
    
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

In [None]:
def relu(x):
    s = np.maximum(0,x)
    return s 

In [None]:
def forward_propagation_n(X, Y, parameters):
  
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    X=X.flatten()
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
 
    output = np.dot(W2, A1) + b2
    cost=cross_entropy_loss(Y,output)
    

    
    return cost

In [None]:
import numpy as np
parameters_values= dictionary_to_vector(model)

In [None]:
params = list(model.parameters())

In [None]:
model.parameters

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=1024, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)>

In [None]:
for param in params:
  #print(param.data)
  print(param.shape)

torch.Size([128, 1024])
torch.Size([128])
torch.Size([10, 128])
torch.Size([10])


In [None]:
def gradient_check(model, X, Y, epsilon=1e-7):
 
    parameters_values = dictionary_to_vector(model)
    grad = gradients_to_vector(model)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    scaling_fac=.0000001
    gradapprox = np.zeros((num_parameters, 1))
    for i in range(num_parameters):
      
        thetaplus =  np.copy(parameters_values)                                      
        thetaplus[i][0] = thetaplus[i][0] + epsilon                                   
        J_plus[i] =  forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))  
       
        thetaminus = np.copy(parameters_values)                                       
        thetaminus[i][0] = thetaminus[i][0] - epsilon                                     
        J_minus[i] = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))
       
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
        
    numerator = np.linalg.norm(grad - gradapprox)                                    
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)                  
    difference = numerator / denominator 
    difference = difference*scaling_fac                                             
  

    if difference > 1e-7:
        print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference

In [None]:
i=0
for data, target in train_dataset:
  if i != 2 :

        print("F")
        
  
        i=i+1
  else :
    break


F
F


In [None]:
target

8

In [None]:
data=data.numpy()
data

In [None]:
gradient_check(model, data, target, 1e-7)

[92mYour backward propagation works perfectly fine! difference = 9.996481073745762e-08[0m


9.996481073745762e-08

In [None]:
config = dict(
    epochs=1,
    classes=10,
    batch_size=4,
    learning_rate=0.001,
    regularization=.001)

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.008 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.094892…

0,1
epoch,▁▁▁▁▁▁▁▁▁▁
loss,█▅▅▃▄▃▂▄▁▁

0,1
epoch,0.0
loss,0.21785


In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
count_parameters(model)

132490

# L2



In [None]:
from google.colab import drive

# Mount your Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, random_split
from PIL import Image

class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.img_paths = []
        self.labels = []
        
        for label in os.listdir(root_dir):
            label_dir = os.path.join(root_dir, label)
            if not os.path.isdir(label_dir):
                continue
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                self.img_paths.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)
        label = int(self.labels[idx])
        return img, label



In [None]:
!pip install wandb -Uq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.0/184.0 KB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.9/178.9 KB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 KB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.6/140.6 KB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [143]:
import wandb

wandb.login()



True

In [144]:
wandb.init(project="gurnumL2")

0,1
epoch,▁▁▁▁▁▁▁▁▁▁██████████
loss,▆█▅▄▂▂▃▂▁▁▁▂▂▃▂▁▁▆▁▁

0,1
epoch,1.0
loss,0.03709


In [145]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [146]:
transform = transforms.Compose([
                                transforms.ToTensor(),
                                ])

train_dataset = ImageDataset(root_dir='/content/drive/MyDrive/GurNum/train', transform=transform)
test_dataset = ImageDataset(root_dir='/content/drive/MyDrive/GurNum/val', transform=transform)


In [147]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=True)

In [148]:
for i, data in enumerate(train_dataset):
    inputs, labels = data
    print(data)
    
    if i == 2: # only show the first 3 data points
        break

(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)
(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 0.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)
(tensor([[[1., 1., 0.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         [0., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)


In [149]:
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=1*32*32, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=10)

    def forward(self, x):
        # x = self.pool(torch.relu(self.conv1(x)))
        # x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [150]:
model = Net()
model.to(device)

# Make sure to call input = input.to(device) on any input tensors that you feed to the model
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [151]:
wandb.watch(model, criterion, log="all", log_freq=10)

[]

In [152]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [156]:
def l2_loss(model, weight_decay):
    l2_loss = 0
    for name, param in model.named_parameters():
        if 'bias' not in name:
            l2_loss += torch.sum(torch.sum(param ** 2))
    return weight_decay * l2_loss

In [157]:
for name, param in model.named_parameters():
        if 'bias' not in name:
            print(param)

Parameter containing:
tensor([[ 0.0088, -0.0212, -0.0191,  ..., -0.0236, -0.0124, -0.0120],
        [ 0.0126, -0.0299,  0.0168,  ...,  0.0134,  0.0139,  0.0099],
        [ 0.0036,  0.0106, -0.0140,  ...,  0.0202, -0.0072, -0.0042],
        ...,
        [ 0.0222,  0.0262,  0.0093,  ..., -0.0274, -0.0006,  0.0089],
        [-0.0159,  0.0137, -0.0175,  ...,  0.0136,  0.0052,  0.0258],
        [-0.0260, -0.0063,  0.0036,  ..., -0.0193, -0.0174, -0.0301]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0631,  0.0671, -0.0301,  ...,  0.0093,  0.0180, -0.0625],
        [-0.0347,  0.0425, -0.0413,  ...,  0.0570,  0.0804,  0.0863],
        [ 0.0276,  0.0643,  0.0154,  ...,  0.0406, -0.0812,  0.0188],
        ...,
        [-0.0274,  0.0562, -0.0737,  ..., -0.0617, -0.0504,  0.0271],
        [ 0.0580,  0.0339, -0.0799,  ...,  0.0824,  0.0474, -0.0520],
        [ 0.0674,  0.0778,  0.0162,  ...,  0.0535, -0.0605,  0.0769]],
       requires_grad=True)


.1 , 5

In [158]:
example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(2):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        target=torch.tensor(target)
        reg_loss = l2_loss(model, 0.0000001)
        loss = criterion(output, target.to(device)) +reg_loss
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()
    print("for epoch ",epoch, " loss= ",running_loss)   

Loss after 00096 examples: 2.206
Loss after 00196 examples: 1.496
Loss after 00296 examples: 0.823
Loss after 00396 examples: 0.949
Loss after 00496 examples: 0.421
Loss after 00596 examples: 0.295
Loss after 00696 examples: 0.399
Loss after 00796 examples: 0.060
Loss after 00896 examples: 0.099
Loss after 00996 examples: 0.272
for epoch  0  loss=  218.5984680056572
Loss after 01096 examples: 0.239
Loss after 01196 examples: 0.026
Loss after 01296 examples: 0.055
Loss after 01396 examples: 0.514
Loss after 01496 examples: 0.023
Loss after 01596 examples: 0.249
Loss after 01696 examples: 0.149
Loss after 01796 examples: 0.486
Loss after 01896 examples: 0.043
Loss after 01996 examples: 0.077
for epoch  1  loss=  51.35153544135392


In [159]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data=data.to(device)
            target=target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    #wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))


Model accuracy: 91.57303370786516


# l2 grad


In [160]:
def dictionary_to_vector(model):
    """
    Roll all our parameters dictionary into a single vector satisfying our specific required shape.
    """
    #keys = []
    params = list(model.parameters())
    p=[]
    for param in params:
      p.append(param.cpu().detach().numpy())
     
    count = 0

    for i in range(len(p)):
  
        
        # flatten parameter
        new_vector = np.reshape(p[i], (-1,1))
        #keys = keys + [key]*new_vector.shape[0]
        
        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta, new_vector), axis=0)
        count = count + 1

    return theta

In [161]:
def gradients_to_vector(model):
    
    theta = np.reshape(model.fc1.weight.grad.cpu().detach().numpy(), (-1,1))
    theta1=np.concatenate((theta,np.reshape( model.fc1.bias.grad.cpu().detach().numpy(), (-1,1))), axis=0)
    theta2=np.concatenate((theta1, np.reshape(model.fc2.weight.grad.cpu().detach().numpy(), (-1,1))), axis=0)
    theta3=np.concatenate((theta2, np.reshape(model.fc2.bias.grad.cpu().detach().numpy(), (-1,1))), axis=0)

    return theta3

In [162]:
def vector_to_dictionary(theta):
    """
    Unroll all our parameters dictionary from a single vector satisfying our specific required shape.
    """
    parameters = {}
    parameters["W1"] = theta[:131072].reshape(128, 1024)
    parameters["b1"] = theta[131072:131200].reshape((128,1))
    parameters["W2"] = theta[131200:132480].reshape((10, 128))
    parameters["b2"] = theta[132480:132490].reshape((10,1))

    return parameters

In [163]:
import numpy as np

def cross_entropy_loss(y_true, y_pred):
    
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

In [164]:
def relu(x):
  
    s = np.maximum(0,x)
    return s 

In [165]:
def forward_propagation_n(X, Y, parameters):
  
    
 
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    X=X.flatten()
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)
 
    output = np.dot(W2, A1) + b2
    cost=cross_entropy_loss(Y,output)
    

    
    return cost

In [166]:
import numpy as np
parameters_values= dictionary_to_vector(model)

In [167]:
params = list(model.parameters())

In [168]:
model.parameters

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=1024, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)>

In [169]:
for param in params:
  #print(param.data)
  print(param.shape)

torch.Size([128, 1024])
torch.Size([128])
torch.Size([10, 128])
torch.Size([10])


In [177]:


def gradient_check_n(model, X, Y, epsilon=1e-7):

   
    parameters_values = dictionary_to_vector(model)
    grad = gradients_to_vector(model)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    
    for i in range(num_parameters):
        
        
        thetaplus =  np.copy(parameters_values)                                       # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon                                   # Step 2
        J_plus[i] =  forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))  # Step 3
        
        thetaminus = np.copy(parameters_values)                                       # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon                                 # Step 2        
        J_minus[i] = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3
        
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
       
    
    
    numerator = np.linalg.norm(grad - gradapprox)                                     # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)                   # Step 2'
    difference = numerator / denominator                                              # Step 3'
    
    truth=0
    if difference > 1e-7:
        truth=1
        print(difference)
   
    return truth

In [171]:
i=0
for data, target in train_dataset:
  if i != 2 :

        print("F")
        
  
        i=i+1
  else :
    break


F
F


In [172]:
target

8

In [173]:
data=data.numpy()
data

array([[[1., 1., 0., ..., 1., 1., 1.],
        [1., 0., 0., ..., 1., 1., 1.],
        [0., 0., 0., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]]], dtype=float32)

In [178]:
gradient_check_n(model, data, target, 1e-7)

0.9993981630496983


1

In [175]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [176]:
count_parameters(model)

132490

# Dropout




In [None]:
from google.colab import drive

# Mount your Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, random_split
from PIL import Image

class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.img_paths = []
        self.labels = []
        
        for label in os.listdir(root_dir):
            label_dir = os.path.join(root_dir, label)
            if not os.path.isdir(label_dir):
                continue
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                self.img_paths.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)
        label = int(self.labels[idx])
        return img, label



In [None]:
!pip install wandb -Uq

In [179]:
import wandb

wandb.login()



True

In [180]:
wandb.init(project="gurnum_drop_out")

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▁▁▁▁▁▁██████████
loss,█▆▄▄▂▂▂▁▁▂▂▁▁▃▁▂▁▂▁▁

0,1
epoch,1.0
loss,0.07651


In [181]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [182]:
transform = transforms.Compose([
                                transforms.ToTensor(),
                                ])

train_dataset = ImageDataset(root_dir='/content/drive/MyDrive/GurNum/train', transform=transform)
test_dataset = ImageDataset(root_dir='/content/drive/MyDrive/GurNum/val', transform=transform)


In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=True)

In [183]:
for i, data in enumerate(train_dataset):
    inputs, labels = data
    print(data)
    
    if i == 2: # only show the first 3 data points
        break

(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)
(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 0.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)
(tensor([[[1., 1., 0.,  ..., 1., 1., 1.],
         [1., 0., 0.,  ..., 1., 1., 1.],
         [0., 0., 0.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), 8)


In [184]:
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        # self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=1*32*32, out_features=128)
        self.dropout = nn.Dropout(0.12)
        self.fc2 = nn.Linear(in_features=128, out_features=10)


    def forward(self, x):
        # x = self.pool(torch.relu(self.conv1(x)))
        # x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [185]:
model = Net()
model.to(device)

# Make sure to call input = input.to(device) on any input tensors that you feed to the model
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [186]:
wandb.watch(model, criterion, log="all", log_freq=10)

[]

In [187]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [189]:
example_ct = 0  # number of examples seen
batch_ct = 0
for epoch in range(2):
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data.to(device))
        target=torch.tensor(target)
        loss = criterion(output, target.to(device))
        loss.backward()
        optimizer.step()
        example_ct +=  len(target)
        batch_ct += 1
        if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
        running_loss += loss.item()
    print("for epoch ",epoch, " loss= ",running_loss)   

Loss after 00096 examples: 0.035
Loss after 00196 examples: 0.085
Loss after 00296 examples: 0.038
Loss after 00396 examples: 0.019
Loss after 00496 examples: 0.018
Loss after 00596 examples: 0.297
Loss after 00696 examples: 0.049
Loss after 00796 examples: 0.030
Loss after 00896 examples: 1.025
Loss after 00996 examples: 0.049
for epoch  0  loss=  34.72037482727319
Loss after 01096 examples: 0.573
Loss after 01196 examples: 0.011
Loss after 01296 examples: 0.044
Loss after 01396 examples: 0.177
Loss after 01496 examples: 0.027
Loss after 01596 examples: 0.055
Loss after 01696 examples: 0.099
Loss after 01796 examples: 0.004
Loss after 01896 examples: 0.378
Loss after 01996 examples: 0.019
for epoch  1  loss=  23.10290250205435


In [190]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data=data.to(device)
            target=target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total
    #wandb.log({"test_accuracy": correct / total})
    return accuracy

print("Model accuracy:", evaluate_model(model, test_loader))
acc=evaluate_model(model, test_loader)

Model accuracy: 92.69662921348315


In [191]:
wandb.log({"accuracy": acc})

In [192]:
wandb.finish()

0,1
accuracy,▁
epoch,▁▁▁▁▁▁▁▁▁▁██████████
loss,██▄▄▂▂▄▂▁▃▁▂▁▂▂▁▁▁▂▁

0,1
accuracy,92.13483
epoch,1.0
loss,0.01861
