### Training on GPUs

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
import os
# The jupyter notebook is launched from your $HOME directory.
# Change the working directory to the workshop directory
# which was created in your username directory under /scratch/vp91
os.chdir(os.path.expandvars("/scratch/vp91/$USER/"))

#### Set Device
Se the default device as the GPU if it exists

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

### Curate the dataset
Load the dataset, split into features (X) and output (y) variables

In [None]:
datapath = os.path.expandvars('/scratch/vp91/$USER/intro-to-pytorch/data/pima-indians-diabetes.data.csv')

dataset = np.loadtxt(datapath, delimiter=',')
X = dataset[:,0:8] 
y = dataset[:,8]

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

### Defining the Model

In [None]:
class PimaClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 12)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(12, 8)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(8, 1)
        self.act_output = nn.Sigmoid()
 
    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act_output(self.output(x))
        return x

In [None]:
class_model = PimaClassifier()
print(class_model)

#### Save the model

In [None]:
modelpath = os.path.expandvars('/scratch/vp91/$USER/class_model')
print(modelpath)

In [None]:
torch.save(class_model.state_dict(), modelpath)

In [None]:
!ls /scratch/vp91/$USER/class_model

#### Load the model on the GPU

In [None]:
class_model.load_state_dict(torch.load(modelpath, map_location=device, weights_only=True))
class_model.to(device)

In [None]:
loss_fn = nn.BCELoss()

In [None]:
optimizer = optim.Adam(class_model.parameters(), lr=0.001)

#### Training the Model

In [None]:
%%time
n_epochs = 100
batch_size = 10
 
for epoch in range(n_epochs):
    for i in range(0, len(X_tensor), batch_size):
        Xbatch = X_tensor[i:i+batch_size].to(device) # move the tensor to GPU

        y_pred = class_model(Xbatch)
        
        ybatch = y_tensor[i:i+batch_size].to(device) # move the tensor to GPU
        
        loss = loss_fn(y_pred, ybatch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Finished epoch {epoch}, latest loss {loss}')

#### Evaluate the Model

Currently, we are testing the model on the training dataset. Ideally, we should split the data into separate training and testing datasets, or use a distinct dataset for evaluation. For simplicity, we are testing the model on the same data used for training.



In [None]:
with torch.no_grad():
    y_pred = class_model(X_tensor.to(device))
 
accuracy = (y_pred.round().to(device) == y_tensor.to(device)).float().mean()
print(f"Accuracy {accuracy}")

### Exercise

1. **What is the time difference in training**? Compare it with the previous training.