### Steps of GPU Training

1. Check GPU availability

Before Starting, verify if a GPU is available, select it, otherwise use CPU

```
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
```

2. Move the Model to GPU

Move your model to the selected device (`cuda` for GPU or `CPU`) so hat all computations occur on the same device
```
model = MyNN(X_train.shape[1])
model = model.to(device)
```

3. Modify the Training Loop by Moving Data to GPU

Ensure that each batch of data (features and labels) is moved to the GPU before processing. This ensures that both the model and data are on the same device
```
  for batch_features, batch_labels in train_loader:
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)
```

4. Modify the Evaluation Loop by Moving data to GPU

Similarly, ensure test data is moved to the GPU during evaluation. Disable gradient calculations using `torch.no_grad()`
```
with torch.no_grad():
  for batch_features, batch_labels in test_loader:
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)
```

5. Optimize the GPU usage

To make the best use of GPU resources, apply the following optimizations:

  a. Use Larger Batch Sizes

  Larger batch sizes can better utilize GPU memory and reduce computation time per epoch(if memory allows)
  ```
  train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, pin_memory=True)
  ```

  b. Enable DataLoader Pinning

  Use `pin_memory=True` in `DataLoader` to speed up data transfer from CPU to GPU
  ```
  train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)
  ```

### Imports, Preprocessing and Data Loading


In [1]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"prasadkhambadkar","key":"eeca1b08da1cda1479e85d53c1ce2a1c"}'}

In [2]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d zalando-research/fashionmnist

!unzip fashionmnist.zip -d fashionmnist

Dataset URL: https://www.kaggle.com/datasets/zalando-research/fashionmnist
License(s): other
Downloading fashionmnist.zip to /content
 74% 51.0M/68.8M [00:00<00:00, 171MB/s] 
100% 68.8M/68.8M [00:00<00:00, 144MB/s]
Archive:  fashionmnist.zip
  inflating: fashionmnist/fashion-mnist_test.csv  
  inflating: fashionmnist/fashion-mnist_train.csv  
  inflating: fashionmnist/t10k-images-idx3-ubyte  
  inflating: fashionmnist/t10k-labels-idx1-ubyte  
  inflating: fashionmnist/train-images-idx3-ubyte  
  inflating: fashionmnist/train-labels-idx1-ubyte  


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [4]:
torch.manual_seed(42)

<torch._C.Generator at 0x7c934b612690>

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [6]:
df = pd.read_csv("/content/fashionmnist/fashion-mnist_train.csv")
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train/255.0
X_test = X_test/255.0

In [8]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype=torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]

In [10]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [19]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)
# by doing pin_memory = True the speed of training will increase if the dataset is very very large

In [20]:
class MyNN(nn.Module):
  def __init__(self, num_features):
    super().__init__()
    self.model = nn.Sequential(
        nn.Linear(num_features, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10) # Here we don't need to add softmax because there is internal softmax in CrossEntropy
    )

  def forward(self, x):
    return self.model(x)

In [21]:
epochs = 50
learning_rate = 0.1

### Moving the model to GPU

In [22]:
model = MyNN(X_train.shape[1])
model = model.to(device)
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [23]:
for epoch in range(epochs):
  total_epoch_loss = 0
  for batch_features, batch_labels in train_loader:
    # Moving batch_features and batch_labels to GPU
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    outputs = model(batch_features)

    loss = criterion(outputs, batch_labels)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()
    total_epoch_loss += loss.item()

  loss = total_epoch_loss / len(train_loader)
  print(f'Epoch {epoch+1}, Loss: {loss}')

Epoch 1, Loss: 0.6411633166472117
Epoch 2, Loss: 0.42567913804451624
Epoch 3, Loss: 0.3850230619510015
Epoch 4, Loss: 0.3527769510919849
Epoch 5, Loss: 0.33541005745033425
Epoch 6, Loss: 0.3162104703237613
Epoch 7, Loss: 0.3030245339249571
Epoch 8, Loss: 0.2925685998822252
Epoch 9, Loss: 0.2788450455752512
Epoch 10, Loss: 0.2703151357596119
Epoch 11, Loss: 0.26351695999503133
Epoch 12, Loss: 0.2536412532130877
Epoch 13, Loss: 0.24613721238449215
Epoch 14, Loss: 0.23912700902546447
Epoch 15, Loss: 0.23455527458464107
Epoch 16, Loss: 0.22899569040661058
Epoch 17, Loss: 0.22130280486370127
Epoch 18, Loss: 0.21810389885554712
Epoch 19, Loss: 0.21122133492305875
Epoch 20, Loss: 0.20624930018559098
Epoch 21, Loss: 0.20294206120694677
Epoch 22, Loss: 0.19774032927304505
Epoch 23, Loss: 0.19367083395272494
Epoch 24, Loss: 0.18769370994654794
Epoch 25, Loss: 0.18854369070443014
Epoch 26, Loss: 0.18211310348799453
Epoch 27, Loss: 0.17756872406912347
Epoch 28, Loss: 0.1742767193187028
Epoch 29, L

In [24]:
model.eval()

MyNN(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [25]:
# Evaluation code
total = 0
correct = 0
with torch.no_grad():
  for batch_features, batch_labels in test_loader:
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)
    outputs = model(batch_features)
    _, predicted = torch.max(outputs, 1)
    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

print(correct/total)

0.8928333333333334
