<a href="https://colab.research.google.com/github/Rishabh66/Optimizing_GPU_breastCancer/blob/main/Optimizing_GPU_breastCancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

import sklearn.datasets as datasets
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [36]:
# will use Mnist dataset which have 70k images, but will train our module with 6000 Images

# Random seeds

torch.manual_seed(42)

<torch._C.Generator at 0x7a7f9a5a4510>

In [37]:
df = pd.read_csv('fmnist_small.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [38]:
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [39]:
X.shape

(6000, 784)

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [41]:
# scaling the features
X_train = X_train/255
X_test = X_test/255

In [42]:
# Create a Customer Dataset
class CustomerDataset(Dataset):
  def __init__(self, data, lables):
    self.data = torch.tensor(data, dtype=torch.float32)
    self.labels = torch.tensor(lables, dtype=torch.long)

  def __len__(self):
    return len(self.data)

  def __getitem__(self, index):
    return self.data[index], self.labels[index]

In [43]:
# create a object for CustomerDataset class
traindataset = CustomerDataset(X_train, y_train)
testdataset = CustomerDataset(X_test, y_test)

In [44]:
len(traindataset)

4800

In [45]:
traindataset[0]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0

In [46]:
# create dataloader Object
trainloader = DataLoader(traindataset, batch_size=64, shuffle = True, pin_memory=True)
testloader = DataLoader(testdataset, batch_size = 4, shuffle = False, pin_memory= True)


In [47]:
# create a NN Class and then a Training loop

class MyNN(nn.Module):
  def __init__(self, input_features=784):
    super().__init__()

    self.model = nn.Sequential(
        nn.Linear(input_features, 128),
        nn.BatchNorm1d(128), # batch normalization
        nn.ReLU(),
        nn.Dropout(0.3),

        nn.Linear(128,64),
        nn.BatchNorm1d(64), # batch normalization
        nn.ReLU(),
        nn.Dropout(0.3),

        nn.Linear(64,10)
    )
  def forward(self,x):
        return self.model(x)

In [48]:
# set parameters
epochs = 100
learning_rate = 0.1

In [49]:
# initiate model creating object of model

model = MyNN(X_train.shape[1])

# add Data to GPU
model = model.to(device)

# loss function
criteria = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay= 1e-4)


In [50]:
# Creating Trainging Loops
for epoch in range(epochs):
  total_epoch_loss = 0

  for batch_features, batch_lables in trainloader:

    # Add data to GPU
    batch_features = batch_features.to(device)
    batch_lables = batch_lables.to(device)

    # Forward pass
    model_output = model(batch_features)

    # Calculate Loss
    loss = criteria(model_output, batch_lables)

    # zero gradients
    optimizer.zero_grad()

    # Back Pass
    loss.backward()

    # update grads
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  total_epoch_loss = total_epoch_loss/len(trainloader)
  print(f'Epoch {epoch+1} Loss {total_epoch_loss}')



Epoch 1 Loss 1.064584219455719
Epoch 2 Loss 0.6893338926633199
Epoch 3 Loss 0.6101653552055359
Epoch 4 Loss 0.5700783697764079
Epoch 5 Loss 0.5131807680924734
Epoch 6 Loss 0.4936527109146118
Epoch 7 Loss 0.45052923997243244
Epoch 8 Loss 0.447202676932017
Epoch 9 Loss 0.4271087352434794
Epoch 10 Loss 0.41503758052984874
Epoch 11 Loss 0.38404425700505573
Epoch 12 Loss 0.3769886843363444
Epoch 13 Loss 0.3691411187251409
Epoch 14 Loss 0.3436760697762171
Epoch 15 Loss 0.35124763727188113
Epoch 16 Loss 0.32703014234701794
Epoch 17 Loss 0.3178626624743144
Epoch 18 Loss 0.3173454091946284
Epoch 19 Loss 0.2926302037636439
Epoch 20 Loss 0.28948679904143015
Epoch 21 Loss 0.29972314298152924
Epoch 22 Loss 0.2853665272394816
Epoch 23 Loss 0.2701905091603597
Epoch 24 Loss 0.27215610444545746
Epoch 25 Loss 0.2663740179936091
Epoch 26 Loss 0.25612218846877416
Epoch 27 Loss 0.24684289733568826
Epoch 28 Loss 0.2354394402106603
Epoch 29 Loss 0.21894815156857173
Epoch 30 Loss 0.2319549000263214
Epoch 31 L

In [51]:
# set model to eval mode
model.eval()

MyNN(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [52]:
# Evalution Code will calculate accuracy

total = 0
correct_variable = 0

with torch.no_grad():
  for batch_features, batch_lables in testloader:
    # adding data to GPU
    batch_features = batch_features.to(device)
    batch_lables = batch_lables.to(device)

    model_output = model(batch_features)

    _, predicted = torch.max(model_output, 1)
    total = total + batch_lables.size(0)
    correct_variable = correct_variable + (torch.max(model_output, 1)[1] == batch_lables).sum().item()

print(correct_variable/total)


0.8316666666666667


In [53]:
# Evalution Code will calculate accuracy

total = 0
correct_variable = 0

with torch.no_grad():
  for batch_features, batch_lables in trainloader:
    # adding data to GPU
    batch_features = batch_features.to(device)
    batch_lables = batch_lables.to(device)

    model_output = model(batch_features)

    _, predicted = torch.max(model_output, 1)
    total = total + batch_lables.size(0)
    correct_variable = correct_variable + (torch.max(model_output, 1)[1] == batch_lables).sum().item()

print(correct_variable/total)


0.9941666666666666
