# What is CUDA?

Most people confuse CUDA for a language or maybe an API. It is not.

It’s more than that. CUDA is a parallel computing platform and programming model that makes using a GPU for general purpose computing simple and elegant. The developer still programs in the familiar C, C++, Fortran, or an ever expanding list of supported languages, and incorporates extensions of these languages in the form of a few basic keywords.

These keywords let the developer express massive amounts of parallelism and direct the compiler to the portion of the application that maps to the GPU.

# How do I know if I have CUDA available?

In [1]:
import torch
torch.cuda.is_available()
# True

True

# Using GPU and CUDA

In [2]:
## Get Id of default device
torch.cuda.current_device()

0

In [3]:
# 0
torch.cuda.get_device_name(0) # Get name device with ID '0'

'NVIDIA GeForce RTX 3050 Laptop GPU'

In [4]:
# Returns the current GPU memory usage by 
# tensors in bytes for a given device
torch.cuda.memory_allocated()

0

In [5]:
# Returns the current GPU memory managed by the
# caching allocator in bytes for a given device
torch.cuda.memory_cached()



0

# Using CUDA instead of CPU

In [6]:
# CPU
a = torch.FloatTensor([1.,2.])

In [7]:
a

tensor([1., 2.])

In [8]:
a.device

device(type='cpu')

### GPU

In [9]:
# specificy that we are using GPU when creating a tensors
a = torch.FloatTensor([1., 2.]).cuda()  # as an additional method call

In [10]:
a.device

device(type='cuda', index=0)

In [11]:
torch.cuda.memory_allocated()

512

## Sending Models to GPU

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [13]:
class Model(nn.Module):
    def __init__(self, in_features=4, h1=8, h2=9, out_features=3):
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)    # input layer
        self.fc2 = nn.Linear(h1, h2)            # hidden layer
        self.out = nn.Linear(h2, out_features)  # output layer
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        return x

In [14]:
torch.manual_seed(32)
model = Model()

In [15]:
# From the discussions here: discuss.pytorch.org/t/how-to-check-if-model-is-on-cuda
next(model.parameters()).is_cuda

False

In [16]:
# essentially sending model to the GPU
gpumodel = model.cuda()

In [17]:
next(gpumodel.parameters()).is_cuda

True

In [18]:
df = pd.read_csv('C:/Users/gaura/OneDrive/Desktop/PDGA/PYTORCH_NOTEBOOKS/Data/iris.csv')
X = df.drop('target',axis=1).values
y = df['target'].values
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=33)

## Convert Tensors to .cuda() tensors

In [19]:
X_train = torch.FloatTensor(X_train).cuda()
X_test = torch.FloatTensor(X_test).cuda()
y_train = torch.LongTensor(y_train).cuda()
y_test = torch.LongTensor(y_test).cuda()

In [20]:
trainloader = DataLoader(X_train, batch_size=60, shuffle=True)
testloader = DataLoader(X_test, batch_size=60, shuffle=False)

In [21]:
# no need to change anything here
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [22]:
import time
epochs = 100
losses = []
start = time.time()
for i in range(epochs):
    i+=1
    y_pred = gpumodel.forward(X_train)
    loss = criterion(y_pred, y_train)
    losses.append(loss)
    
    # a neat trick to save screen space:
    if i%10 == 1:
        print(f'epoch: {i:2}  loss: {loss.item():10.8f}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print(f'TOTAL TRAINING TIME: {time.time()-start}')

epoch:  1  loss: 1.15071130
epoch: 11  loss: 0.93773133
epoch: 21  loss: 0.77982587
epoch: 31  loss: 0.60993993
epoch: 41  loss: 0.40079930
epoch: 51  loss: 0.25436318
epoch: 61  loss: 0.15053052
epoch: 71  loss: 0.10086943
epoch: 81  loss: 0.08128314
epoch: 91  loss: 0.07231426
TOTAL TRAINING TIME: 0.32145094871520996


In [23]:
correct = 0

with torch.no_grad(): # this deactivates the gradiant engine
    for i, data in enumerate(X_test):
        y_val = gpumodel.forward(data)
        print(f"{i+1:2}. {str(y_val):38}  {y_test[i]}")
        if y_val.argmax().item() == y_test[i]:
            correct += 1
        
print(f'\n{correct} out of {len(y_test)} = {100 * correct/len(y_test):.2f}% correct')

 1. tensor([-2.1252,  4.8064, -0.8628], device='cuda:0')  1
 2. tensor([-1.7985,  5.3098, -1.5449], device='cuda:0')  1
 3. tensor([  6.3542,   0.8438, -10.0541], device='cuda:0')  0
 4. tensor([-3.9123,  4.5958,  1.1693], device='cuda:0')  1
 5. tensor([-7.4713,  3.2021,  5.7853], device='cuda:0')  2
 6. tensor([-10.4976,   1.6459,   9.6297], device='cuda:0')  2
 7. tensor([  6.3201,   0.9917, -10.1532], device='cuda:0')  0
 8. tensor([  7.0468,   0.7059, -10.9137], device='cuda:0')  0
 9. tensor([-7.2061,  3.3477,  5.3565], device='cuda:0')  2
10. tensor([-9.3960,  2.5759,  8.1033], device='cuda:0')  2
11. tensor([-9.8808,  2.3475,  8.7141], device='cuda:0')  2
12. tensor([ 6.2748,  0.6655, -9.7613], device='cuda:0')  0
13. tensor([-9.3142,  2.1880,  8.1947], device='cuda:0')  2
14. tensor([-3.7803,  4.5050,  1.0752], device='cuda:0')  1
15. tensor([-7.8657,  3.0117,  6.2303], device='cuda:0')  2
16. tensor([-1.8867,  5.1572, -1.3345], device='cuda:0')  1
17. tensor([-5.7006,  3.5030