In [1]:
import pandas as pd
import plotly
import plotly.express as px
from IPython.core.display import HTML
import torch
import numpy as np

In [2]:

iris = pd.read_csv('iris.csv')
print(iris.head())

species = list(iris["variety"].unique())
print(iris['variety'].value_counts())

print(species)
iris.describe()



   sepal.length  sepal.width  petal.length  petal.width variety
0           5.1          3.5           1.4          0.2  Setosa
1           4.9          3.0           1.4          0.2  Setosa
2           4.7          3.2           1.3          0.2  Setosa
3           4.6          3.1           1.5          0.2  Setosa
4           5.0          3.6           1.4          0.2  Setosa
Virginica     50
Setosa        50
Versicolor    50
Name: variety, dtype: int64
['Setosa', 'Versicolor', 'Virginica']


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [3]:


fig = px.scatter_3d(iris[["sepal.length","sepal.width","petal.length","petal.width","variety"]],
                    x = 'sepal.length',
                    y = 'sepal.width',
                    z = 'petal.width',
                    size = 'petal.length',
                    color = 'variety',
                    opacity = 0.7)

fig.update_layout(margin = dict(l=0, r=0, b=0, t=0))


HTML(plotly.offline.plot(fig, filename='5d_iris_scatter.html',include_plotlyjs='cdn'))

In [4]:
fig = px.scatter_matrix(iris, dimensions=["sepal.width", "sepal.length", "petal.width", "petal.length"],color="variety")
HTML(plotly.offline.plot(fig, filename='5d_scatter_matrix.html',include_plotlyjs='cdn'))

In [5]:
selected_features = ['petal.length', 'petal.width']

input_columns_all = torch.from_numpy(iris[list(iris.columns)[:-2]].to_numpy()).type(torch.float32)
input_columns = torch.from_numpy(iris[selected_features].to_numpy()).type(torch.float32)
output_columns = torch.tensor(iris['variety'].astype('category').cat.codes)

print("Input columns all: ", input_columns_all.shape, input_columns_all.dtype)
print("Input columns: ", input_columns.shape, input_columns.dtype)
print("Output columns: ", output_columns.shape, output_columns.dtype)

Input columns all:  torch.Size([150, 3]) torch.float32
Input columns:  torch.Size([150, 2]) torch.float32
Output columns:  torch.Size([150]) torch.int8


In [6]:
data = torch.utils.data.TensorDataset(input_columns, output_columns)

In [7]:
split = 0.1
rows = list(input_columns.shape)[0]
test_split = int(rows*split)
val_split = int(rows*split*2)
train_split = rows - val_split - test_split

train_set, val_set, test_set = torch.utils.data.random_split(data, [train_split, val_split, test_split])

In [8]:
train_loader = torch.utils.data.DataLoader(train_set, 16, shuffle = True) #batch size = 16
val_loader = torch.utils.data.DataLoader(val_set) #batch size = 1
test_loader = torch.utils.data.DataLoader(test_set) #batch size = 1

In [45]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dimension, output_dimension):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dimension, output_dimension)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs

    def training_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        loss = torch.nn.functional.cross_entropy(outputs, targets.long())
        return loss

    def validation_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        loss = torch.nn.functional.cross_entropy(outputs, targets.long())
        _, pred = torch.max(outputs, 1)
        accuracy = torch.tensor(torch.sum(pred==targets).item()/len(pred))
        return [loss.detach(), accuracy.detach()] 


In [46]:
epochs = 200
learning_rate = 0.01


model = LogisticRegression(len(selected_features), len(species))
criterion = torch.nn.CrossEntropyLoss()

In [47]:
def fit(model, train_loader, val_loader, epochs, lr, criterion_function = torch.nn.functional.cross_entropy, optimizer_function = torch.optim.Adam):
    history = {"loss" : [], "accuracy" : []}
    optimizer = optimizer_function(model.parameters(), lr)
    for epoch in range(epochs):
        print("Epoch ", epoch)
        #Train
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        #Validate
        for batch in val_loader:
            loss, accuracy = evaluate(model, val_loader)
        print("loss: ", loss.item(), "accuracy: ", accuracy.item(), "\n")
        history["loss"].append(loss.item())
        history["accuracy"].append(accuracy.item())
         
    return history

In [48]:
def evaluate(model, loader):
    outputs = [model.validation_step(batch) for batch in loader]
    outputs = torch.tensor(outputs).T
    loss, accuracy = torch.mean(outputs, dim=1)
    return loss, accuracy

In [49]:
fit(model, train_loader, val_loader, epochs, learning_rate)

Epoch  0
loss:  1.6985193490982056 accuracy:  0.30000001192092896 

Epoch  1
loss:  1.4178080558776855 accuracy:  0.30000001192092896 

Epoch  2
loss:  1.2302438020706177 accuracy:  0.30000001192092896 

Epoch  3
loss:  1.1358810663223267 accuracy:  0.30000001192092896 

Epoch  4
loss:  1.101354718208313 accuracy:  0.30000001192092896 

Epoch  5
loss:  1.0809509754180908 accuracy:  0.30000001192092896 

Epoch  6
loss:  1.0579463243484497 accuracy:  0.30000001192092896 

Epoch  7
loss:  1.0323728322982788 accuracy:  0.30000001192092896 

Epoch  8
loss:  1.0059645175933838 accuracy:  0.30000001192092896 

Epoch  9
loss:  0.9818710088729858 accuracy:  0.30000001192092896 

Epoch  10
loss:  0.9644540548324585 accuracy:  0.5333333611488342 

Epoch  11
loss:  0.9398399591445923 accuracy:  0.3333333432674408 

Epoch  12
loss:  0.915465772151947 accuracy:  0.30000001192092896 

Epoch  13
loss:  0.8922751545906067 accuracy:  0.30000001192092896 

Epoch  14
loss:  0.8723837733268738 accuracy:  0

{'loss': [1.6985193490982056,
  1.4178080558776855,
  1.2302438020706177,
  1.1358810663223267,
  1.101354718208313,
  1.0809509754180908,
  1.0579463243484497,
  1.0323728322982788,
  1.0059645175933838,
  0.9818710088729858,
  0.9644540548324585,
  0.9398399591445923,
  0.915465772151947,
  0.8922751545906067,
  0.8723837733268738,
  0.8529859781265259,
  0.8332421183586121,
  0.8193958401679993,
  0.8039640784263611,
  0.7884728908538818,
  0.7722324132919312,
  0.7557240724563599,
  0.7399097084999084,
  0.7267166376113892,
  0.7123829126358032,
  0.6996530294418335,
  0.6895722150802612,
  0.6813505291938782,
  0.6683018803596497,
  0.6538861393928528,
  0.6450960636138916,
  0.6354535818099976,
  0.6249321699142456,
  0.6154871582984924,
  0.6057191491127014,
  0.5977340340614319,
  0.5890392661094666,
  0.5804833173751831,
  0.5775497555732727,
  0.5702192783355713,
  0.5621476769447327,
  0.5548458099365234,
  0.5479496121406555,
  0.5421233773231506,
  0.537203848361969,
  0.5