In [1]:
import torch
from torch.utils.data import Dataset , DataLoader
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
!pip install torchinfo
from torchinfo import summary



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
file = 'drive/My Drive/Data_precog/Iris.csv'

In [4]:
df = pd.read_csv(file)
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
df.shape[0] # number of examples

150

In [6]:
df.shape[1] # number of features

6

In [7]:
# dropping the Id row
df = df.drop('Id',axis=1)
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
# training and test split
X = df.drop('Species',axis=1)
y = df['Species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train)

     SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
22             4.6           3.6            1.0           0.2
15             5.7           4.4            1.5           0.4
65             6.7           3.1            4.4           1.4
11             4.8           3.4            1.6           0.2
42             4.4           3.2            1.3           0.2
..             ...           ...            ...           ...
71             6.1           2.8            4.0           1.3
106            4.9           2.5            4.5           1.7
14             5.8           4.0            1.2           0.2
92             5.8           2.6            4.0           1.2
102            7.1           3.0            5.9           2.1

[120 rows x 4 columns]


In [9]:
# label encoding for y_train and y_test
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

print(y_train)

[0 0 1 0 0 2 1 0 0 0 2 1 1 0 0 1 2 2 1 2 1 2 1 0 2 1 0 0 0 1 2 0 0 0 1 0 1
 2 0 1 2 0 2 2 1 1 2 1 0 1 2 0 0 1 1 0 2 0 0 1 1 2 1 2 2 1 0 0 2 2 0 0 0 1
 2 0 2 2 0 1 1 2 1 2 0 2 1 2 1 1 1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 1 2 2 1 2 1
 1 2 2 0 1 2 0 1 2]


In [10]:
# scaling the values so that every entry is less than 1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# print(X_train)

All values scaled and label encoding done


In [11]:
# converting everything to tensors

X_train = torch.tensor(X_train , dtype = torch.float32)
X_test = torch.tensor(X_test, dtype = torch.float32)
y_train = torch.tensor(y_train , dtype = torch.long)
y_test = torch.tensor(y_test , dtype = torch.long)

In [12]:
# creating the dataset class
class CustomDataset (Dataset):
  def __init__(self,features,labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return len(self.features)

  def __getitem__(self,idx):
    return self.features[idx],self.labels[idx]

In [13]:
training_dataset = CustomDataset(X_train, y_train)
testing_dataset = CustomDataset(X_test, y_test)

In [14]:
train_dataloader = DataLoader(training_dataset, batch_size = 30, shuffle = True)
test_dataloader = DataLoader(testing_dataset, batch_size = 30, shuffle = False) # no need to shuffle for test

In [15]:
class Model (nn.Module) :
  def __init__(self, num_of_features) : # it will be 4
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(num_of_features, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, 3) ,
        # applying a softmax layer
        nn.Softmax(dim = 1)
    )

  def forward (self, features) :
      return self.network(features)


In [16]:
X_train.shape[1]

4

In [17]:
model = Model(X_train.shape[1])
summary(model, input_size=(X_train.shape[0], X_train.shape[1]))

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [120, 3]                  --
├─Sequential: 1-1                        [120, 3]                  --
│    └─Linear: 2-1                       [120, 64]                 320
│    └─ReLU: 2-2                         [120, 64]                 --
│    └─Linear: 2-3                       [120, 64]                 4,160
│    └─ReLU: 2-4                         [120, 64]                 --
│    └─Linear: 2-5                       [120, 3]                  195
│    └─Softmax: 2-6                      [120, 3]                  --
Total params: 4,675
Trainable params: 4,675
Non-trainable params: 0
Total mult-adds (M): 0.56
Input size (MB): 0.00
Forward/backward pass size (MB): 0.13
Params size (MB): 0.02
Estimated Total Size (MB): 0.15

In [18]:
learning_rate = 0.01
epochs = 100
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [26]:

model.train()

for epoch in range(0, epochs) :
  total_loss = 0
  for batch_features, batch_labels in train_dataloader :
    # forward pass
    y_pred = model(batch_features)

    # loss calculation
    loss = loss_fn(y_pred, batch_labels)
    # clearing gradients
    optimizer.zero_grad()

    # backpropagation
    loss.backward()

    # updating the parameters
    optimizer.step()
    total_loss += loss

  print(epoch, total_loss / len(train_dataloader))

0 tensor(0.8561, grad_fn=<DivBackward0>)
1 tensor(0.8555, grad_fn=<DivBackward0>)
2 tensor(0.8549, grad_fn=<DivBackward0>)
3 tensor(0.8544, grad_fn=<DivBackward0>)
4 tensor(0.8539, grad_fn=<DivBackward0>)
5 tensor(0.8532, grad_fn=<DivBackward0>)
6 tensor(0.8527, grad_fn=<DivBackward0>)
7 tensor(0.8521, grad_fn=<DivBackward0>)
8 tensor(0.8515, grad_fn=<DivBackward0>)
9 tensor(0.8509, grad_fn=<DivBackward0>)
10 tensor(0.8504, grad_fn=<DivBackward0>)
11 tensor(0.8498, grad_fn=<DivBackward0>)
12 tensor(0.8493, grad_fn=<DivBackward0>)
13 tensor(0.8487, grad_fn=<DivBackward0>)
14 tensor(0.8482, grad_fn=<DivBackward0>)
15 tensor(0.8477, grad_fn=<DivBackward0>)
16 tensor(0.8471, grad_fn=<DivBackward0>)
17 tensor(0.8466, grad_fn=<DivBackward0>)
18 tensor(0.8461, grad_fn=<DivBackward0>)
19 tensor(0.8455, grad_fn=<DivBackward0>)
20 tensor(0.8450, grad_fn=<DivBackward0>)
21 tensor(0.8445, grad_fn=<DivBackward0>)
22 tensor(0.8440, grad_fn=<DivBackward0>)
23 tensor(0.8435, grad_fn=<DivBackward0>)
24

In [27]:
model.eval()

Model(
  (network): Sequential(
    (0): Linear(in_features=4, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=3, bias=True)
    (5): Softmax(dim=1)
  )
)

In [28]:

total = 0
correct = 0


with torch.no_grad() :
  for batch_features, batch_labels in test_dataloader :
    output = model(batch_features)
    print(output , end = " ")
    _, predicted = torch.max(output , dim = 1)
    total = total + batch_labels.size(0)
    correct = correct + (predicted == batch_labels).sum().item()
    # print(predicted , end = " ")
    # print(batch_labels)
    # print(correct)
    # print(total)
print("Accuracy is ")
print(correct / total)

tensor([[1.7495e-02, 8.1468e-01, 1.6783e-01],
        [9.7428e-01, 2.5705e-02, 1.3294e-05],
        [1.6811e-06, 2.3908e-02, 9.7609e-01],
        [8.0990e-03, 6.9014e-01, 3.0176e-01],
        [3.6104e-03, 6.0615e-01, 3.9024e-01],
        [9.5286e-01, 4.7097e-02, 4.5980e-05],
        [5.3577e-02, 8.6087e-01, 8.5552e-02],
        [2.8179e-05, 7.1113e-02, 9.2886e-01],
        [1.1379e-03, 3.8438e-01, 6.1449e-01],
        [3.5032e-02, 8.5568e-01, 1.0929e-01],
        [4.4021e-04, 2.5098e-01, 7.4858e-01],
        [9.7048e-01, 2.9500e-02, 2.3316e-05],
        [9.7607e-01, 2.3920e-02, 1.2937e-05],
        [9.7224e-01, 2.7740e-02, 1.9826e-05],
        [9.8130e-01, 1.8694e-02, 8.3255e-06],
        [8.4600e-03, 7.0272e-01, 2.8882e-01],
        [2.7304e-05, 6.9110e-02, 9.3086e-01],
        [4.1542e-02, 8.6117e-01, 9.7284e-02],
        [1.9768e-02, 8.0683e-01, 1.7340e-01],
        [2.1117e-05, 6.0395e-02, 9.3958e-01],
        [9.7265e-01, 2.7333e-02, 2.0369e-05],
        [1.4938e-03, 4.0409e-01, 5