In [1]:
import torch 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from numpy import random
from torch import nn

try:
  from torchsummary import summary
except:
  !pip install torchsummary
  from torchsummary import summary

try:
  from torchmetrics import Accuracy
except:
  !pip install torchmetrics
  from torchmetrics import Accuracy

from sklearn.datasets import load_iris


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchmetrics
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.11.4


In [2]:
def change_device(tensor: torch.Tensor, device="cuda"):
  return tensor.to(device)

def setup_device():
    return "cuda" if  torch.cuda.is_available() else "cpu"


def change_dtype(tensor:torch.Tensor, dtype=torch.float):
  """
    dtype must have the value mentioned in the torch documentation
    e.g. torch.float , torch.LongTensor
  """
  return tensor.to(dtype)
def show_image(image, label=None):
  """
    show image on the screen
    require convert_to_numpy function to be called first
  """
  if label is None:
    label = "Image Caption"
  plt.figure(figsize=(4,4))
  plt.imshow(convert_to_numpy(image))
  plt.title(label)
  plt.axis(False)

def convert_to_tensor(array, is_change_dtype=False, is_change_device=False, to_device="cpu")->torch.Tensor:
  """
  convert numpy array to tensor
  is_change_dtype=True will change the dtype to float
  """

  tensor = None
  if torch.is_tensor(array):
     tensor = array
  else:
     tensor = torch.from_numpy(array)
  
  if is_change_dtype: 
    tensor = change_dtype(tensor)
  
  if is_change_device: 
    tensor = change_device(tensor, device=to_device)
  
  return tensor

# convert_to_tensor(random.rand(2,3),is_change_dtype=True)

In [3]:
DEVICE = setup_device()
TEST_SIZE = 0.33
RANDOM_STATE = 42 

data_classes = ["class1", "class2", "class3"]

X, y = load_iris(return_X_y=True)

# print(target)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)


print(f"{X_train.shape} -> {y_train.shape} -> {X_test.shape} -> {y_test.shape}" )

(100, 4) -> (100,) -> (50, 4) -> (50,)


In [5]:
X_train = convert_to_tensor(X_train, is_change_dtype=True, is_change_device=True, to_device= DEVICE)
X_test = convert_to_tensor(X_test, is_change_dtype=True, is_change_device=True, to_device= DEVICE)

y_train = convert_to_tensor(y_train, is_change_dtype=True, is_change_device=True, to_device= DEVICE)
y_test = convert_to_tensor(y_test, is_change_dtype=True, is_change_device=True, to_device= DEVICE)

print(f"{X_train.dtype} -> {X_test.dtype} -> {y_train.dtype} -> {y_test.dtype}")

torch.float32 -> torch.float32 -> torch.float32 -> torch.float32


In [6]:
class IrisClassficationModel(nn.Module):
  def __init__(self, input_features: int, hidden_features: int, output_features: int):
    super().__init__()
    self.layer = nn.Sequential(
          nn.Linear(input_features, hidden_features),
          nn.Linear(hidden_features, hidden_features),
          nn.Linear(hidden_features, output_features)
      )
  
  def forward(self, x):
    return self.layer(x)


In [7]:
INPUT_FEATURES = 4
HIDDEN_FEATURES = 8 
OUTPUT_FEATURES = 3

model0 = change_device(tensor=IrisClassficationModel(input_features=INPUT_FEATURES, hidden_features=HIDDEN_FEATURES,
                       output_features=OUTPUT_FEATURES),device=DEVICE)
# summary(model0, (1,4))
# X_train[0].unsqueeze(dim=0).shape

In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model0.parameters(), lr=0.1)


In [9]:
torch.manual_seed(RANDOM_STATE)

EPOCHS = 100
accuracy = Accuracy(task="multiclass", num_classes=len(data_classes))

y_train = y_train.to(torch.long)#.to(torch.long)
X_train = X_train.to(torch.long)#.to(torch.long)

X_test = X_test.to(torch.long)#.to(torch.long)
y_test = y_test.to(torch.long)#.to(torch.long)

print(f"{X_train.dtype} -> {X_test.dtype} -> {y_train.dtype} -> {y_test.dtype}")
print(f"y_test ->{y_train[:5]}  y_train-> {X_train[:5]}")

torch.int64 -> torch.int64 -> torch.int64 -> torch.int64
y_test ->tensor([1, 2, 1, 0, 2])  y_train-> tensor([[5, 2, 4, 1],
        [7, 3, 6, 2],
        [5, 3, 4, 1],
        [5, 3, 1, 0],
        [7, 2, 6, 2]])


In [10]:
for epoch in range(EPOCHS):
  model0.train()

  y_pred = model0(X_train.to(torch.float32))
  # print(y_pred.shape)
  # print(y_train.shape)
  loss = loss_fn(y_pred, y_train)
  train_acc = accuracy(y_pred,y_train)

  # 3. Optimizer zero grad
  optimizer.zero_grad()

  # 4. Loss backwards
  loss.backward()

  # 5. Optimizer step
  optimizer.step()

  model0.eval()

  with torch.inference_mode():
    test_pred = model0(X_test.to(torch.float32))
  
  test_loss = loss_fn(test_pred, y_test)
  
  test_train_acc = accuracy(test_pred,y_test)

  if epoch % 10 == 0: 
    print(f"loss -> {loss:0.5f}  accuracy-> {train_acc:0.5f} test_loss-> {test_loss:0.5f} test_train_acc-> {test_train_acc:0.5f}")

loss -> 1.21475  accuracy-> 0.34000 test_loss-> 1.15239 test_train_acc-> 0.12000
loss -> 0.81589  accuracy-> 0.65000 test_loss-> 0.77034 test_train_acc-> 0.70000
loss -> 0.56576  accuracy-> 0.79000 test_loss-> 0.50207 test_train_acc-> 0.70000
loss -> 0.46619  accuracy-> 0.85000 test_loss-> 0.39131 test_train_acc-> 0.96000
loss -> 0.55515  accuracy-> 0.66000 test_loss-> 0.49370 test_train_acc-> 0.70000
loss -> 0.40365  accuracy-> 0.85000 test_loss-> 0.30975 test_train_acc-> 0.96000
loss -> 0.55036  accuracy-> 0.68000 test_loss-> 0.40229 test_train_acc-> 0.70000
loss -> 0.37990  accuracy-> 0.93000 test_loss-> 0.26889 test_train_acc-> 0.88000
loss -> 0.45067  accuracy-> 0.71000 test_loss-> 0.29241 test_train_acc-> 0.88000
loss -> 0.38937  accuracy-> 0.79000 test_loss-> 0.26203 test_train_acc-> 0.88000


In [11]:
model0.eval()
with torch.inference_mode():
  ev_test_pred = model0(X_test.to(torch.float))
ev_loss = loss_fn(ev_test_pred, y_test)
ev_accuracy = accuracy(ev_test_pred, y_test)

print(f"ev_loss -> {ev_loss:5f} ev_accuracy->{ev_accuracy:5f}")

ev_loss -> 0.322678 ev_accuracy->0.800000
