In [1]:
import pandas as pd
import plotly
import plotly.express as px
from IPython.core.display import HTML
import torch
import numpy as np

In [2]:

iris = pd.read_csv('iris.csv')
print(iris.head())

species = list(iris["variety"].unique())
print(iris['variety'].value_counts())

print(species)
iris.describe()



   sepal.length  sepal.width  petal.length  petal.width variety
0           5.1          3.5           1.4          0.2  Setosa
1           4.9          3.0           1.4          0.2  Setosa
2           4.7          3.2           1.3          0.2  Setosa
3           4.6          3.1           1.5          0.2  Setosa
4           5.0          3.6           1.4          0.2  Setosa
Virginica     50
Setosa        50
Versicolor    50
Name: variety, dtype: int64
['Setosa', 'Versicolor', 'Virginica']


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [3]:


fig = px.scatter_3d(iris[["sepal.length","sepal.width","petal.length","petal.width","variety"]],
                    x = 'sepal.length',
                    y = 'sepal.width',
                    z = 'petal.width',
                    size = 'petal.length',
                    color = 'variety',
                    opacity = 0.7)

fig.update_layout(margin = dict(l=0, r=0, b=0, t=0))


HTML(plotly.offline.plot(fig, filename='5d_iris_scatter.html',include_plotlyjs='cdn'))

In [4]:
fig = px.scatter_matrix(iris, dimensions=["sepal.width", "sepal.length", "petal.width", "petal.length"],color="variety")
HTML(plotly.offline.plot(fig, filename='5d_scatter_matrix.html',include_plotlyjs='cdn'))

In [5]:
selected_features = ['petal.length', 'petal.width']

input_columns_all = torch.from_numpy(iris[list(iris.columns)[:-1]].to_numpy()).type(torch.float32)
input_columns = torch.from_numpy(iris[selected_features].to_numpy()).type(torch.float32)
output_columns = torch.tensor(iris['variety'].astype('category').cat.codes)

print("Input columns all: ", input_columns_all.shape, input_columns_all.dtype)
print("Input columns: ", input_columns.shape, input_columns.dtype)
print("Output columns: ", output_columns.shape, output_columns.dtype)

Input columns all:  torch.Size([150, 4]) torch.float32
Input columns:  torch.Size([150, 2]) torch.float32
Output columns:  torch.Size([150]) torch.int8


In [6]:
data = torch.utils.data.TensorDataset(input_columns, output_columns)

In [7]:
split = 0.1
rows = list(input_columns.shape)[0]
test_split = int(rows*split)
val_split = int(rows*split*2)
train_split = rows - val_split - test_split

train_set, val_set, test_set = torch.utils.data.random_split(data, [train_split, val_split, test_split])

In [8]:
train_loader = torch.utils.data.DataLoader(train_set, 16, shuffle = True) #batch size = 16
val_loader = torch.utils.data.DataLoader(val_set) #batch size = 1
test_loader = torch.utils.data.DataLoader(test_set) #batch size = 1

Cross entropy loss is the predicted probability compared to how far that is from the actual value.

In [9]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dimension, output_dimension):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dimension, output_dimension)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs

    def training_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        loss = torch.nn.functional.cross_entropy(outputs, targets.long())
        return loss

    def validation_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        loss = torch.nn.functional.cross_entropy(outputs, targets.long())
        _, pred = torch.max(outputs, 1)
        # Calculate the number of correct predictions over the number of predictions
        accuracy = torch.tensor(torch.sum(pred==targets).item()/len(pred))
        return [loss.detach(), accuracy.detach()] 


In [10]:
def fit(model, train_loader, val_loader, epochs, learning_rate, criterion_function = torch.nn.functional.cross_entropy, optimizer_function = torch.optim.Adam):
    history = {"loss" : [], "accuracy" : []}
    optimizer = optimizer_function(model.parameters(), learning_rate)
    for epoch in range(epochs):
        print("Epoch ", epoch)
        #Train
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        #Validate
        for batch in val_loader:
            loss, accuracy = evaluate(model, val_loader)
        print("loss: ", loss.item(), "accuracy: ", accuracy.item(), "\n")
        history["loss"].append(loss.item())
        history["accuracy"].append(accuracy.item())
         
    return history

In [11]:
def evaluate(model, loader):
    outputs = [model.validation_step(batch) for batch in loader]
    outputs = torch.tensor(outputs).T
    loss, accuracy = torch.mean(outputs, dim=1)
    return loss, accuracy

In [12]:
epochs = 200
learning_rate = 0.01

model = LogisticRegression(len(selected_features), len(species))
fit(model, train_loader, val_loader, epochs, learning_rate)

Epoch  0
loss:  2.030327320098877 accuracy:  0.4000000059604645 

Epoch  1
loss:  1.7610653638839722 accuracy:  0.30000001192092896 

Epoch  2
loss:  1.5293662548065186 accuracy:  0.30000001192092896 

Epoch  3
loss:  1.3294787406921387 accuracy:  0.30000001192092896 

Epoch  4
loss:  1.1859362125396729 accuracy:  0.30000001192092896 

Epoch  5
loss:  1.1106562614440918 accuracy:  0.2666666805744171 

Epoch  6
loss:  1.0785987377166748 accuracy:  0.30000001192092896 

Epoch  7
loss:  1.0622694492340088 accuracy:  0.30000001192092896 

Epoch  8
loss:  1.0444947481155396 accuracy:  0.30000001192092896 

Epoch  9
loss:  1.0267270803451538 accuracy:  0.30000001192092896 

Epoch  10
loss:  1.0071864128112793 accuracy:  0.30000001192092896 

Epoch  11
loss:  0.986542820930481 accuracy:  0.30000001192092896 

Epoch  12
loss:  0.9669081568717957 accuracy:  0.30000001192092896 

Epoch  13
loss:  0.9469481110572815 accuracy:  0.30000001192092896 

Epoch  14
loss:  0.9274236559867859 accuracy:  0

{'loss': [2.030327320098877,
  1.7610653638839722,
  1.5293662548065186,
  1.3294787406921387,
  1.1859362125396729,
  1.1106562614440918,
  1.0785987377166748,
  1.0622694492340088,
  1.0444947481155396,
  1.0267270803451538,
  1.0071864128112793,
  0.986542820930481,
  0.9669081568717957,
  0.9469481110572815,
  0.9274236559867859,
  0.9105685353279114,
  0.8952395915985107,
  0.8774256110191345,
  0.8615209460258484,
  0.8450669646263123,
  0.8303794860839844,
  0.8138881325721741,
  0.7995455861091614,
  0.7860969305038452,
  0.7716020345687866,
  0.759977400302887,
  0.7482830286026001,
  0.7353783249855042,
  0.7253599166870117,
  0.7136319279670715,
  0.7012836933135986,
  0.6901937127113342,
  0.6796579360961914,
  0.6687695980072021,
  0.6603279113769531,
  0.6509579420089722,
  0.641491174697876,
  0.6332687139511108,
  0.62464839220047,
  0.616381824016571,
  0.6088261008262634,
  0.6014447808265686,
  0.5936964154243469,
  0.5861839056015015,
  0.5798320770263672,
  0.57268

In [15]:
data_all = torch.utils.data.TensorDataset(input_columns_all, output_columns)

#train_split, val_split and test_split defined earlier
train_set_all, val_set_all, test_set_all = torch.utils.data.random_split(data_all, [train_split, val_split, test_split])

train_loader_all = torch.utils.data.DataLoader(train_set_all, 16, shuffle = True)
val_loader_all = torch.utils.data.DataLoader(val_set_all)
test_loader_all = torch.utils.data.DataLoader(test_set_all)

model_all = LogisticRegression(4, len(species))
history_all = fit(model_all, train_loader_all, val_loader_all, epochs, learning_rate)
loss , accuracy = evaluate(model_all, test_loader_all)
print("Evaluation result: Loss: ", loss.item(), " Accuracy: ", accuracy.item())

Epoch  0
loss:  0.9214321374893188 accuracy:  0.20000000298023224 

Epoch  1
loss:  0.8374913334846497 accuracy:  0.30000001192092896 

Epoch  2
loss:  0.7913421392440796 accuracy:  0.5 

Epoch  3
loss:  0.7314777374267578 accuracy:  0.5666666626930237 

Epoch  4
loss:  0.7106940746307373 accuracy:  0.5666666626930237 

Epoch  5
loss:  0.6411460041999817 accuracy:  0.699999988079071 

Epoch  6
loss:  0.6133079528808594 accuracy:  0.699999988079071 

Epoch  7
loss:  0.6007789969444275 accuracy:  0.6333333253860474 

Epoch  8
loss:  0.5849729776382446 accuracy:  0.6000000238418579 

Epoch  9
loss:  0.5588006973266602 accuracy:  0.6666666865348816 

Epoch  10
loss:  0.514543890953064 accuracy:  0.9333333373069763 

Epoch  11
loss:  0.49819281697273254 accuracy:  0.9333333373069763 

Epoch  12
loss:  0.5612049698829651 accuracy:  0.5666666626930237 

Epoch  13
loss:  0.4893837571144104 accuracy:  0.7666666507720947 

Epoch  14
loss:  0.4779756963253021 accuracy:  0.800000011920929 

Epoch 