In [85]:
import pandas as pd
import matplotlib.pyplot as plt
import opendatasets as od

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.optim import SGD


In [86]:
od.download('https://www.kaggle.com/datasets/uciml/iris',data_dir='./datasets/')

Skipping, found downloaded files in "./datasets/iris" (use force=True to force download)


In [87]:
df = pd.read_csv('./datasets/iris/Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [88]:
df = df.drop('Id',axis=1)
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [89]:
df = df.iloc[:100]
df['Species'] = df['Species'].apply(lambda x: 0 if x == 'Iris-setosa' else 1)
print(df.head(100))

    SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  Species
0             5.1           3.5            1.4           0.2        0
1             4.9           3.0            1.4           0.2        0
2             4.7           3.2            1.3           0.2        0
3             4.6           3.1            1.5           0.2        0
4             5.0           3.6            1.4           0.2        0
..            ...           ...            ...           ...      ...
95            5.7           3.0            4.2           1.2        1
96            5.7           2.9            4.2           1.3        1
97            6.2           2.9            4.3           1.3        1
98            5.1           2.5            3.0           1.1        1
99            5.7           2.8            4.1           1.3        1

[100 rows x 5 columns]


In [90]:
features = torch.tensor(df[['SepalLengthCm','PetalLengthCm']].values,dtype=torch.float32)
labels = torch.tensor(df['Species'].values,dtype=torch.float32)

In [91]:
features.shape, labels.shape

(torch.Size([100, 2]), torch.Size([100]))

In [92]:
torch.manual_seed(1)
shuffled_index = torch.randperm(labels.size()[0], dtype=torch.long)

In [93]:
features,labels = features[shuffled_index],labels[shuffled_index]

In [94]:
X_train, X_test = features[shuffled_index[:75]],features[shuffled_index[75:]]
y_train,y_test = labels[shuffled_index[:75]],labels[shuffled_index[75:]]

In [95]:
mean = X_train.mean(dim=0)
std = X_train.std(dim=0)

X_train = (X_train-mean)/std
X_test = (X_test-mean)/std

In [96]:
class Adaline(nn.Module):
    def __init__(self,num_features):
        super(Adaline,self).__init__()
        self.L1 = nn.Linear(num_features,1)

        # if we want zero weight initialization
        # self.linear.weight.detach().zero_() #underscore does inplace
        # self.linear.bias.detach().zero_()
    
    def forward(self,x):
        inputs = self.L1(x)
        activations = inputs
        return activations.view(-1)

In [97]:
# def loss_func(yhat,y):
#     return torch.mean((yhat-y)**2,dtype=float)

In [110]:
def train(model,X,y,num_epochs,learning_rate=0.01,seed=1,batch_size=16):
    torch.manual_seed(seed)
    cost=[]
    optim = SGD(model.parameters(),lr=learning_rate)

    for epoch in range(num_epochs):

        shuffled_idx = torch.randperm(y.size()[0],dtype=torch.long)
        mini_batch_idx_ranges = torch.split(shuffled_idx,batch_size)
        
        for minibatch_idx in mini_batch_idx_ranges:
            y_hat = model.forward(X[minibatch_idx])
            loss = F.mse_loss(y_hat,y[minibatch_idx])
            optim.zero_grad()
            loss.backward()
            optim.step()
        
        with torch.no_grad():
            y_hat = model.forward(X)
            #print(y_hat.dtype,y.dtype)
            current_loss = F.mse_loss(y_hat,y)
            print("Epoch {} | MSE {}".format(epoch,round(current_loss.item(),2)))
            cost.append(current_loss)
        
    return cost

In [99]:
X_train.size()[1]

2

In [111]:
model = Adaline(X_train.size()[1])
cost = train(model,X_train,y_train,
             num_epochs=25,
             learning_rate=0.1,
             seed=1,
             batch_size=10)


Epoch 0 | MSE 0.04
Epoch 1 | MSE 0.02
Epoch 2 | MSE 0.02
Epoch 3 | MSE 0.02
Epoch 4 | MSE 0.01
Epoch 5 | MSE 0.01
Epoch 6 | MSE 0.01
Epoch 7 | MSE 0.01
Epoch 8 | MSE 0.01
Epoch 9 | MSE 0.01
Epoch 10 | MSE 0.01
Epoch 11 | MSE 0.01
Epoch 12 | MSE 0.01
Epoch 13 | MSE 0.01
Epoch 14 | MSE 0.01
Epoch 15 | MSE 0.01
Epoch 16 | MSE 0.01
Epoch 17 | MSE 0.01
Epoch 18 | MSE 0.01
Epoch 19 | MSE 0.01
Epoch 20 | MSE 0.01
Epoch 21 | MSE 0.01
Epoch 22 | MSE 0.01
Epoch 23 | MSE 0.01
Epoch 24 | MSE 0.01


In [101]:
d = torch.tensor([[2.778,3.992]])