# Linear regression on iris data in pytorch

In [1]:
import torch
import pandas as pd
from sklearn import datasets
import seaborn as sns
import torch.nn as nn
from torch.utils.data import random_split, DataLoader, TensorDataset
import torch.nn.functional as F

In [3]:
dataset = pd.read_csv("Iris.csv")
dataset.describe()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [5]:
dataset['Species'].value_counts()

setosa        50
versicolor    50
virginica     50
Name: Species, dtype: int64

In [7]:
Species = list(dataset["Species"].unique())
dataset["Species_index"] = dataset["Species"].apply(Species.index)
dataset.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,Species_index
0,5.1,3.5,1.4,0.2,setosa,0
1,4.9,3.0,1.4,0.2,setosa,0
2,4.7,3.2,1.3,0.2,setosa,0
3,4.6,3.1,1.5,0.2,setosa,0
4,5.0,3.6,1.4,0.2,setosa,0


In [11]:
import plotly
import plotly.express as px

fig = px.scatter_3d(dataset[["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm","Species"]],
                    x = 'SepalLengthCm',
                    y = 'SepalWidthCm',
                    z = 'PetalWidthCm',
                    size = 'PetalLengthCm',
                    color = 'Species',
                    opacity = 0.7)

fig.update_layout(margin = dict(l=0, r=0, b=0, t=0))

In [12]:
input_columns_all = torch.from_numpy(dataset[list(dataset.columns)[:-2]].to_numpy()).type(torch.float32)
input_columns = torch.from_numpy(dataset[['PetalLengthCm', 'PetalWidthCm']].to_numpy()).type(torch.float32)
output_columns = torch.tensor(dataset['Species_index'])

print("Input columns all: ", input_columns_all.shape, input_columns_all.dtype)
print("Input columns: ", input_columns.shape, input_columns.dtype)
print("Output columns: ", output_columns.shape, output_columns.dtype)


Input columns all:  torch.Size([150, 4]) torch.float32
Input columns:  torch.Size([150, 2]) torch.float32
Output columns:  torch.Size([150]) torch.int64


In [13]:
data = TensorDataset(input_columns, output_columns)

In [14]:
split = 0.1
rows = list(input_columns.shape)[0]
test_split = int(rows*split)
val_split = int(rows*split*2)
train_split = rows - val_split - test_split

train_set, val_set, test_set = random_split(data, [train_split, val_split, test_split])

In [15]:
train_loader = DataLoader(train_set, 16, shuffle = True) #batch size = 16
val_loader = DataLoader(val_set) #batch size = 1
test_loader = DataLoader(test_set) #batch size = 1

In [16]:
for x in val_loader:
    print (x)
    break

[tensor([[1.4000, 0.1000]]), tensor([0])]


In [17]:
class IrisModel(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.linear1 = nn.Linear(input_size, 8)
        self.drop = nn.Dropout(0.1)
        self.linear2 = nn.Linear(8, output_size)

    def forward(self, x):
        x1 = self.linear1(x)
        x2 = self.drop(x1)
        outputs = self.linear2(x2)
        return outputs

    def training_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, targets)
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, targets)
        _, pred = torch.max(outputs, 1)
        accuracy = torch.tensor(torch.sum(pred==targets).item()/len(pred))
        return [loss.detach(), accuracy.detach()] 

In [18]:
def evaluate(model, loader):
    outputs = [model.validation_step(batch) for batch in loader]
    outputs = torch.tensor(outputs).T
    loss, accuracy = torch.mean(outputs, dim=1)
    return loss, accuracy

In [50]:
model = IrisModel(2, 3)
loss, accuracy = evaluate(model, val_loader)
"loss: ", loss.item(), "accuracy: ", accuracy.item()

('loss: ', 1.2138564586639404, 'accuracy: ', 0.5)

In [20]:
def fit(model, train_loader, val_loader, epochs, lr, optimizer_function = torch.optim.Adam):
    history = {"loss" : [], "accuracy" : []}
    optimizer = optimizer_function(model.parameters(), lr)
    for epoch in range(epochs):
        print("Epoch ", epoch)
        #Train
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        #Validate
        for batch in val_loader:
            loss, accuracy = evaluate(model, val_loader)
        print("loss: ", loss.item(), "accuracy: ", accuracy.item(), "\n")
        history["loss"].append(loss.item())
        history["accuracy"].append(accuracy.item())
         
    return history

In [21]:
model = IrisModel(2, 3) # 2 input features and 3 output categories
history = fit(model, train_loader, val_loader, 16, 0.01)
loss, accuracy = evaluate(model, test_loader)
print("Evaluation result: Loss: ", loss.item(), " Accuracy: ", accuracy.item())

Epoch  0
loss:  1.2547764778137207 accuracy:  0.3333333432674408 

Epoch  1
loss:  1.1304751634597778 accuracy:  0.4000000059604645 

Epoch  2
loss:  1.0457825660705566 accuracy:  0.6000000238418579 

Epoch  3
loss:  1.0737560987472534 accuracy:  0.30000001192092896 

Epoch  4
loss:  0.8892136812210083 accuracy:  0.5666666626930237 

Epoch  5
loss:  0.9593086242675781 accuracy:  0.4333333373069763 

Epoch  6
loss:  0.852010190486908 accuracy:  0.6000000238418579 

Epoch  7
loss:  0.8437413573265076 accuracy:  0.6333333253860474 

Epoch  8
loss:  0.8306963443756104 accuracy:  0.6333333253860474 

Epoch  9
loss:  0.7256527543067932 accuracy:  0.7333333492279053 

Epoch  10
loss:  0.6154490113258362 accuracy:  0.7333333492279053 

Epoch  11
loss:  0.6256479620933533 accuracy:  0.800000011920929 

Epoch  12
loss:  0.5524524450302124 accuracy:  0.8666666746139526 

Epoch  13
loss:  0.5417674779891968 accuracy:  0.8333333134651184 

Epoch  14
loss:  0.5655938982963562 accuracy:  0.8333333134

In [22]:
data_all = TensorDataset(input_columns_all, output_columns)

#train_split, val_split and test_split defined earlier
train_set_all, val_set_all, test_set_all = random_split(data_all, [train_split, val_split, test_split])

train_loader_all = DataLoader(train_set_all, 16, shuffle = True)
val_loader_all = DataLoader(val_set_all)
test_loader_all = DataLoader(test_set_all)

model_all = IrisModel(4, 3)
history_all = fit(model_all, train_loader_all, val_loader_all, 16, 0.01)
loss , accuracy = evaluate(model_all, test_loader_all)
print("Evaluation result: Loss: ", loss.item(), " Accuracy: ", accuracy.item())

Epoch  0
loss:  1.4428482055664062 accuracy:  0.03333333507180214 

Epoch  1
loss:  1.0589536428451538 accuracy:  0.6000000238418579 

Epoch  2
loss:  0.8978039026260376 accuracy:  0.5666666626930237 

Epoch  3
loss:  0.7963540554046631 accuracy:  0.6000000238418579 

Epoch  4
loss:  0.6990001797676086 accuracy:  0.5666666626930237 

Epoch  5
loss:  0.6222383379936218 accuracy:  0.6000000238418579 

Epoch  6
loss:  0.588312029838562 accuracy:  0.6333333253860474 

Epoch  7
loss:  0.533092737197876 accuracy:  0.6000000238418579 

Epoch  8
loss:  0.5342414379119873 accuracy:  0.6666666865348816 

Epoch  9
loss:  0.47020724415779114 accuracy:  0.6666666865348816 

Epoch  10
loss:  0.47391045093536377 accuracy:  0.8666666746139526 

Epoch  11
loss:  0.44216954708099365 accuracy:  0.8666666746139526 

Epoch  12
loss:  0.42904213070869446 accuracy:  0.7333333492279053 

Epoch  13
loss:  0.3741326332092285 accuracy:  0.8999999761581421 

Epoch  14
loss:  0.40829232335090637 accuracy:  0.80000