In [1]:
# Importing required libraries
import torch
import pandas as pd
import numpy as np

## IRIS Dataset

In [2]:
# loading 150 rows of data from IRIS Dataset
df_iris = pd.read_csv(u'data/iris.txt',sep=' ')

# distributing the data as features & target ('c' column is the target, others are the features)
features, target = np.array(df_iris[['sl','sw','pl','pw']]), np.array(df_iris['c'])

In [3]:
# D_in is input dimension 4;
# H1 is the first hidden layer dimension, we decide;
# H2 is the second hidden layer dimension, we decide;
# D_out is output dimension, which is 3 in this case.
# CrossEntropyLoss returns the possibility of the result being a member of each possible class.
# In this case, the output of any row will be like this [0.2 0.7 0.1] (20% Class 1, 70% Class 2, 10% Class 3)
D_in, H1, H2, D_out = 4, 16, 8, 3

# defining the functions to be used between our layers
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.ReLU(), # ReLU function optimizes output by setting negative results as 0
    torch.nn.Linear(H1, H2),
    torch.nn.ReLU(),    
    torch.nn.Linear(H2, D_out),
)

# defining the loss function
loss_fn = torch.nn.CrossEntropyLoss(reduction='sum')

# eta value of this regression
learning_rate = 0.005

# defining optimizer algorithm
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# defining epoch limit
max_epoch = 500

# defining epoch step to print loss
print_epoch = 50

# converting features and target into tensors
x, y = torch.FloatTensor(features), torch.LongTensor(target)
    
y = y - 1 # Loss function needs the classes starting from 0

# starting epochs
for t in range(max_epoch): 
    # calculating y_pred by passing x tensor to our model
    y_pred = model(x)

    # comparing calculated y_pred and actual y
    loss = loss_fn(y_pred, y)
    
    # printing the loss in every print_epoch epochs and at the last
    if(t%print_epoch == 0 or t+1 == max_epoch):
        print(t, loss.item())

    # before the backward pass, zero all of the gradients for the variables it will update
    optimizer.zero_grad()

    # backward pass: compute gradient of the loss with respect to model parameters
    loss.backward()

    # update optimizer parameters
    optimizer.step()

0 170.591064453125
50 70.36634826660156
100 19.250059127807617
150 10.490957260131836
200 8.667634963989258
250 7.878386974334717
300 7.43941068649292
350 7.1361775398254395
400 6.900533676147461
450 6.706483364105225
499 6.546168327331543


In [4]:
# getting the result of the dataset with final model
results = model(x)

# preparing a 1-layer result array by geting the highest possible result (for [0.2 0.7 0.1] it is 1st index)
result = np.array([row.argmax() for row in results])

result = result + 1 # incrementing class indexes again for 0th class to be 1

print("Result:\n", result, "\n")

# building a n-by-n confusion matrix where n is the number of output classes
confusion_matrix = torch.zeros((D_out, D_out))

y = y + 1 # incrementing class indexes again for 0th class to be 1

# filling up the confusion matrix
for i in range(len(result)):
    confusion_matrix[int(y[i]) - 1, int(result[i]) - 1] += 1
    
print("Confusion matrix:\n", confusion_matrix, "\n")
    
# calculating model accuracy
accuracy = torch.sum(torch.diag(confusion_matrix))/torch.sum(confusion_matrix)

print("Accuracy:", accuracy*100, "%\n")

Result:
 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3] 

Confusion matrix:
 tensor([[50.,  0.,  0.],
        [ 0., 49.,  1.],
        [ 0.,  1., 49.]]) 

Accuracy: tensor(98.6667) %



## MNIST Dataset

In [5]:
# loading 60.000 rows of training data from MNIST Dataset
df_mnist = pd.read_csv(u'data/mnist_train.csv', sep=',', header=None)

# distributing the data as features & target ([0]th column is the target, [1-784] are the features)
features, target = np.array(df_mnist.iloc[:, [i+1 for i in range(784)]]), np.array(df_mnist.iloc[:, 0])

In [6]:
# D_in is input dimension 28*28=784;
# H1 is the first hidden layer dimension, we decide;
# H2 is the second hidden layer dimension, we decide;
# D_out is output dimension, which is 10 (0 to 9) in this case.
# CrossEntropyLoss returns the possibility of the result being a member of each possible class.
# In this case, the output of any row will be like this:
# [.02 .7 .01 .01 .01 .03 .02 .01 .01 .05] (2% number 1, 70% number 2, 1% number 3 etc.)
D_in, H1, H2, D_out = 784, 64, 32, 10

# defining the functions to be used between our layers
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H1),
    torch.nn.ReLU(), # ReLU function optimizes output by setting negative results as 0
    torch.nn.Linear(H1, H2),
    torch.nn.ReLU(),    
    torch.nn.Linear(H2, D_out),
)

# defining the loss function
loss_fn = torch.nn.CrossEntropyLoss(reduction='sum')

# eta value of this regression
learning_rate = 0.005

# defining optimizer algorithm
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# defining epoch limit
max_epoch = 1000

# defining epoch step to print loss
print_epoch = 100

# converting features and target into tensors
x, y = torch.FloatTensor(features), torch.LongTensor(target)

# starting epochs
for t in range(max_epoch): 
    # calculating y_pred by passing x tensor to our model
    y_pred = model(x)

    # comparing calculated y_pred and actual y
    loss = loss_fn(y_pred, y)
    
    # printing the loss in every print_epoch epochs and at the last
    if(t%print_epoch == 0 or t+1 == max_epoch):
        print(t, loss.item())

    # before the backward pass, zero all of the gradients for the variables it will update
    optimizer.zero_grad()

    # backward pass: compute gradient of the loss with respect to model parameters
    loss.backward()

    # update optimizer parameters
    optimizer.step()

0 681039.75
100 15979.205078125
200 10977.0087890625
300 8616.55078125
400 7155.63671875
500 6087.1015625
600 5313.93798828125
700 4673.34912109375
800 4210.1640625
900 3728.392333984375
999 3393.701416015625


In [7]:
# now, our model is trained, it is time for 10.000 rows of test data to test the final model
df_mnist_test = pd.read_csv(u'data/mnist_test.csv', sep=',', header=None)

# distributing the test data as x_test & y_test
x_test, y_test = torch.FloatTensor(np.array(df_mnist_test.iloc[:, [i+1 for i in range(784)]])), torch.LongTensor(np.array(df_mnist_test.iloc[:, 0]))

# getting the 10-layer results of the test data from the trained model
results = model(x_test)

# preparing a 1-layer result array by geting the highest possible result
result = np.array([row.argmax() for row in results])

print("Result:\n", result, "\n")

# building a n-by-n confusion matrix where n is the number of output classes
confusion_matrix = torch.zeros((D_out, D_out))

# filling up the confusion matrix
for i in range(len(result)):
    confusion_matrix[int(y_test[i]), int(result[i])] += 1
    
print("Confusion matrix:\n", confusion_matrix, "\n")
    
# calculating model accuracy
accuracy = torch.sum(torch.diag(confusion_matrix))/torch.sum(confusion_matrix)

print("Accuracy:", accuracy*100, "%\n")

Result:
 [7 2 1 ... 4 5 6] 

Confusion matrix:
 tensor([[ 962.,    1.,    1.,    0.,    1.,    2.,    4.,    0.,    6.,    3.],
        [   0., 1116.,    3.,    2.,    2.,    1.,    3.,    2.,    5.,    1.],
        [   8.,    4.,  973.,   13.,    5.,    4.,    6.,    6.,   12.,    1.],
        [   1.,    1.,    8.,  948.,    0.,   18.,    1.,    7.,   16.,   10.],
        [   0.,    0.,   11.,    0.,  925.,    2.,   10.,    7.,    2.,   25.],
        [   6.,    6.,    1.,   21.,    5.,  826.,   11.,    2.,   10.,    4.],
        [   9.,    2.,    5.,    1.,    5.,    9.,  922.,    0.,    4.,    1.],
        [   2.,    5.,   12.,   10.,   10.,    3.,    1.,  956.,    3.,   26.],
        [   6.,    7.,   10.,   21.,    6.,   13.,    5.,    5.,  893.,    8.],
        [   7.,    6.,    0.,    8.,   20.,    3.,    3.,   13.,    8.,  941.]]) 

Accuracy: tensor(94.6200) %

