###Here we use Label encoding to convert categorical data (textual labels) into numerical values.
###DataLoder is used to load and manage data from a dataset during the training.
###batch_size parameter specifies the number of data samples that will be included in each mini-batch. In this case, the mini-batch size is set to 16, meaning that 16 data samples will be processed in parallel during each iteration.
###shuffle=True, means shuffle the order of the data samples within the dataset before each epoch.

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
import pandas as pd
from sklearn import preprocessing

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
!wget https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv

--2023-10-19 15:38:52--  https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv
Resolving gist.githubusercontent.com (gist.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to gist.githubusercontent.com (gist.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3858 (3.8K) [text/plain]
Saving to: ‘iris.csv’


2023-10-19 15:38:53 (69.1 MB/s) - ‘iris.csv’ saved [3858/3858]



In [21]:
data = pd.read_csv('iris.csv')
print(data.shape)
print(data.loc[120])
data.head()

(150, 5)
sepal_length          6.9
sepal_width           3.2
petal_length          5.7
petal_width           2.3
species         virginica
Name: 120, dtype: object


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


**Dataloader**

In [10]:
class IrisDataset(Dataset):
    def __init__(self, file):
        self.data = pd.read_csv(file)
        input_features = self.data.values[:, 0:4].astype('float32')
        self.x_train = torch.from_numpy(input_features)

        label_encoder = preprocessing.LabelEncoder()
        self.data['species'] = label_encoder.fit_transform(self.data['species'])
        mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
        print(mapping)
        target_label = self.data['species'].values

        self.y_train = torch.from_numpy(target_label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        return (self.x_train[idx],self.y_train[idx])

In [25]:
dataset = IrisDataset('iris.csv')
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

{'setosa': 0, 'versicolor': 1, 'virginica': 2}


In [12]:
class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim, 50)
        self.layer2 = nn.Linear(50, 50)
        self.layer3 = nn.Linear(50, 3)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        return x

In [13]:
model = Model(4).to(device)

In [14]:
## Optimizer and loss
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

**Train**

In [15]:
epochs = 20

for epoch in range(1,epochs+1):
    print("EPOCH:",epoch,end=" ")
    running_loss=0
    running_acc=0

    for data,labels in dataloader:
        data,labels=data.to(device),labels.to(device)
        optimizer.zero_grad()
        output=model.forward(data)
        loss=criterion(output,labels)

        result=torch.argmax(output,dim=1)
        running_loss+=loss.item()
        running_acc+=torch.mean((result==labels).type(torch.float))

        loss.backward()
        optimizer.step()
    else:
        train_loss=running_loss/len(dataloader)
        train_acc=running_acc/len(dataloader)

        print("Training Loss: {:.3f}".format(train_loss),end=" ")

        print("Train Accuracy: {:.2f}%".format(train_acc.item()*100))

EPOCH: 1 Training Loss: 1.099 Train Accuracy: 32.08%
EPOCH: 2 Training Loss: 1.061 Train Accuracy: 61.87%
EPOCH: 3 Training Loss: 1.023 Train Accuracy: 75.00%
EPOCH: 4 Training Loss: 0.971 Train Accuracy: 67.71%
EPOCH: 5 Training Loss: 0.925 Train Accuracy: 67.71%
EPOCH: 6 Training Loss: 0.877 Train Accuracy: 68.75%
EPOCH: 7 Training Loss: 0.849 Train Accuracy: 67.71%
EPOCH: 8 Training Loss: 0.824 Train Accuracy: 68.75%
EPOCH: 9 Training Loss: 0.811 Train Accuracy: 66.67%
EPOCH: 10 Training Loss: 0.796 Train Accuracy: 77.50%
EPOCH: 11 Training Loss: 0.789 Train Accuracy: 83.33%
EPOCH: 12 Training Loss: 0.766 Train Accuracy: 93.12%
EPOCH: 13 Training Loss: 0.757 Train Accuracy: 88.33%
EPOCH: 14 Training Loss: 0.724 Train Accuracy: 95.00%
EPOCH: 15 Training Loss: 0.721 Train Accuracy: 98.12%
EPOCH: 16 Training Loss: 0.697 Train Accuracy: 95.00%
EPOCH: 17 Training Loss: 0.689 Train Accuracy: 96.25%
EPOCH: 18 Training Loss: 0.677 Train Accuracy: 95.63%
EPOCH: 19 Training Loss: 0.667 Train 

In [16]:
train_features, train_labels = next(iter(dataloader))

In [17]:
train_features[8],train_labels[8]

(tensor([5.0000, 2.3000, 3.3000, 1.0000]), tensor(1))

In [22]:
data = np.array([[6.9000, 3.2000, 5.7000, 2.3000]], dtype=np.float32)
data = torch.from_numpy(data).to(device)

In [23]:
torch.argmax(model(data),dim=1)

tensor([2])