In [1]:
# https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel
import torch
from torch.utils import data
import pandas as pd

class DataSpliter(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, dataframe, features, labels):
        'Initialization'
        super(DataSpliter, self).__init__()
        self.dataframe = dataframe
        self.labels = labels
        self.list_IDs = features

    def __len__(self):
        'Denotes the total number of samples'
        return self.dataframe.shape[0]

    def __getitem__(self, index):
        'Generates one sample of data'
        # Load data and get label
        X = torch.FloatTensor(self.dataframe[self.list_IDs].to_numpy())[index]
        y = (torch.LongTensor(self.dataframe[self.labels].to_numpy())).squeeze(1)[index]

        return X, y

In [2]:
iris = pd.read_csv('https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv')
iris

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
# iris = pd.get_dummies(iris)
# iris.columns
mappings = {
   'Iris-setosa': 0,
   'Iris-versicolor': 1,
   'Iris-virginica': 2
}
iris['Name'] = iris['Name'].apply(lambda x: mappings[x])

In [4]:

test_limit = int(iris.shape[0]*.8)
iris_train = iris[:test_limit]
iris_test = iris[test_limit:]
iris_train.shape, iris_test.shape

((120, 5), (30, 5))

In [5]:
iris_data = DataSpliter(iris_train, ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'], ['Name'])
# X_train = iris_data[:][0]
# y_train = iris_data[:][1]

In [6]:
# Parameters
params = {'batch_size': 50,
          'shuffle': True,
          'num_workers': 2}

In [7]:
training_generator = data.DataLoader(iris_data, **params)

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [9]:
class ANN2(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=4, out_features=16)
        self.fc2 = nn.Linear(in_features=16, out_features=12)
        self.output = nn.Linear(in_features=12, out_features=3)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.output(x)
        return x

In [10]:
model = ANN2()
model

ANN2(
  (fc1): Linear(in_features=4, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=12, bias=True)
  (output): Linear(in_features=12, out_features=3, bias=True)
)

In [11]:
# x_train, y_train = training_generator.__iter__().next()
# y_hat = model.forward(x_train)
# criterion = nn.CrossEntropyLoss()
# y_hat.shape

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [13]:
%%time
epochs = 500
loss_arr = []
for i in range(epochs):
    for x_train, y_train in training_generator:
        y_hat = model.forward(x_train)
        loss = criterion(y_hat, y_train)
        loss_arr.append(loss)

    if i % 10 == 0:
        print(f'Epoch: {i} Loss: {loss}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch: 0 Loss: 1.1114628314971924
Epoch: 10 Loss: 0.7137932777404785
Epoch: 20 Loss: 0.4906112551689148
Epoch: 30 Loss: 0.31363537907600403
Epoch: 40 Loss: 0.36875590682029724
Epoch: 50 Loss: 0.2983701825141907
Epoch: 60 Loss: 0.17003491520881653
Epoch: 70 Loss: 0.2154860496520996
Epoch: 80 Loss: 0.19274850189685822
Epoch: 90 Loss: 0.15811702609062195
Epoch: 100 Loss: 0.20718666911125183
Epoch: 110 Loss: 0.053132735192775726
Epoch: 120 Loss: 0.10098382085561752
Epoch: 130 Loss: 0.04496880620718002
Epoch: 140 Loss: 0.03777848929166794
Epoch: 150 Loss: 0.07400967180728912
Epoch: 160 Loss: 0.16549734771251678
Epoch: 170 Loss: 0.050620187073946
Epoch: 180 Loss: 0.015123116783797741
Epoch: 190 Loss: 0.018933163955807686
Epoch: 200 Loss: 0.01657499559223652
Epoch: 210 Loss: 0.008309027180075645
Epoch: 220 Loss: 0.057054053992033005
Epoch: 230 Loss: 0.01956889033317566
Epoch: 240 Loss: 0.021948207169771194
Epoch: 250 Loss: 0.027825143188238144
Epoch: 260 Loss: 0.022910604253411293
Epoch: 270 

In [14]:
iris2_data2 = DataSpliter(iris_test, ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'], ['Name'])
X_test = iris2_data2[:][0]
y_test = iris2_data2[:][1]
X_test.shape, y_test.shape

(torch.Size([30, 4]), torch.Size([30]))

In [15]:
preds = []
with torch.no_grad():
    for val in X_test:
        y_hat = model.forward(val)
        preds.append(y_hat.argmax().item())

In [16]:
df = pd.DataFrame({'Y': y_test, 'YHat': preds})
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Y'], df['YHat'])]
df

Unnamed: 0,Y,YHat,Correct
0,2,2,1
1,2,2,1
2,2,2,1
3,2,2,1
4,2,2,1
5,2,2,1
6,2,2,1
7,2,2,1
8,2,2,1
9,2,2,1


In [17]:
df['Correct'].sum() / len(df)

0.9666666666666667

In [18]:
for coeff in model.parameters():
    print(coeff)

Parameter containing:
tensor([[ 0.3981,  0.0284,  0.6853,  1.0698],
        [-0.4069,  0.3903, -0.2666,  0.0922],
        [ 0.5218,  0.8250, -0.6306, -1.2016],
        [ 0.1355, -0.3274, -0.3597,  0.3076],
        [ 0.0905,  0.7467, -0.5504, -0.2572],
        [ 0.4317,  0.4432, -0.6113, -0.3963],
        [ 0.2185, -0.4803, -0.2054, -0.4129],
        [-0.0892, -0.3914, -0.0059,  0.0577],
        [ 0.0868,  0.2976,  0.4204, -0.2102],
        [ 0.3062, -0.6850,  0.5834,  0.8635],
        [-0.1052, -0.1982, -0.4827, -0.0628],
        [ 0.5467, -0.1497, -0.2590, -0.2893],
        [-0.2253,  0.2552,  0.1119,  0.8226],
        [-0.3253, -0.1027, -0.3014,  0.4747],
        [ 0.5194,  0.4075, -0.4612, -0.9260],
        [ 0.4818,  0.4820, -0.2426, -0.6389]], requires_grad=True)
Parameter containing:
tensor([-0.6623, -0.0366,  1.0087,  0.3035,  0.9934,  0.1348, -0.2848,  0.1284,
         0.3562, -0.7707, -0.0982,  0.2602, -0.4431,  0.4155,  0.0178, -0.0033],
       requires_grad=True)
Parameter c