# Project 1 -  MLP

## Setup Function

In [154]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split

class_1 = [1,0,0]
class_2 = [0,1,0]
class_3 = [0,0,1]

def convert(data):
    temp = []
    for y in data:
        if y == 1:
            temp += [class_1]
        elif y == 2:
            temp += [class_2]
        elif y == 3:
            temp += [class_3]
    return(temp)

def decode(encoded_class):
    print(encoded_class)
    classes = [class_1, class_2, class_3]
    for idx in range(len(classes)):
        if not (encoded_class-classes[idx]).any():
            return idx+1
        
def decode_spec(encoded_class):
    classes = [class_1, class_2, class_3]
    for idx in range(len(classes)):
        if not encoded_class ==classes[idx]:
            return idx+1

## Data Preparation and Cleaning

In [155]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/'
                 'machine-learning-databases/wine/wine.data', names=(
                                            'Class', 
                                            'Alcohol', 
                                            'Malic acid',
                                            'Ash', 
                                            'Alcalinity of ash',
                                            'Magnesium', 
                                            'Total phenols',
                                            'Flavanoids', 
                                            'Nonflavanoid phenols',
                                            'Proanthocyanins', 
                                            'Color intensity',
                                            'Hue', 
                                            'OD280/OD315 of diluted wines',
                                            'Proline' ))

In [156]:
x_train, x_test, y_train, y_test = train_test_split(df, df['Class'], test_size=0.2)
x_train =(x_train-x_train.min())/(x_train.max()-x_train.min())
x_test =(x_test-x_test.min())/(x_test.max()-x_test.min())
x_train = x_train.drop(['Class'], axis=1).to_numpy()
x_test = x_test.drop(['Class'], axis=1).to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
y_train = np.array(convert(y_train))
y_test = np.array(convert(y_test))


## Random Chance Model

In [157]:
def percent_error(actual, predict):
    return abs(actual-predict)/abs(predict)*100

In [158]:
#Since model is random, simulate multiple times and find a true average
avg_true = 0
simulate = 1000
for i in range(simulate):
    class_cnts = {"1":0, "2":0, "3":0}

    for x in range(len(y_train)):
        x = np.random.randint(3)+1
        class_cnts[str(x)] += 1

    avg_true += percent_error(class_cnts["1"], sum(class_cnts.values()))
print("Percent Error: " + str(avg_true/simulate))

Percent Error: 66.75352112676067


## General Functions

In [159]:
def sigmoid(x):
    return(1/(1 + np.exp(-x)))

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    e_x /= e_x.sum()
    trash = np.zeros(len(e_x))
    trash[np.argmax(e_x)] = 1
    return trash

def selection(x):
    return(softmax(x))

def generate_wt(x, y):
    l =[]
    for i in range(x):
        l.append(np.random.uniform(-1,1,y))
    return(np.array(l).reshape(x, y))

def dot_product(nodes, weights):
    d_fixed = []
    for val in nodes:
        temp = []
        for weight in weights:
            temp += [weight*val]
        d_fixed += [temp]
    d_fixed = np.array(d_fixed)
    return(d_fixed)

## MLP Functions

In [160]:
def forward(x, w1, w2):
    z1 = x.dot(w1)# input from layer 1
    a1 = sigmoid(z1)# out put of layer 2

    z2 = a1.dot(w2)# input of out layer
    a2 = softmax(z2)# output of out layer
    return(a2)

def loss(out, Y):
    s =(np.square(out.astype(int)-Y))
    s = np.sum(s)
    return(s)

def back_prop(x, y, w1_in, w2_in, alpha):
     
    # hidden layer
    z1 = x.dot(w1_in)# input from layer 1
    a1 = sigmoid(z1)# output of layer 2
     
    # Output layer
    z2 = a1.dot(w2_in)# input of out layer
    a2 = softmax(z2)# output of out layer
    
    # error in output layer
    d2 = (a2-y)
    temp1 = w2_in.dot(d2.T)
    temp2 = np.multiply(a1, 1-a1)
    d1 = np.multiply(temp1.T, temp2)
 
    # Gradient for w1 and w2
    w1_adj = dot_product(x,d1)
    w2_adj = dot_product(a1,d2)
     
    # Updating parameters
    temp3 = alpha*(w1_adj)
    w1_out = w1_in-temp3
    temp4 = alpha*(w2_adj)
    w2_out = w2_in-temp4
    
    truth = lambda x: x==0     
    return(w1_out, w2_out)

## Run the MLP

In [161]:
epochs = 500
lr = 0.01
w1 = generate_wt(13, 100)
w2 = generate_wt(100,3)
check = lambda x: x==0
for j in range(epochs):
    temp = 0
    for idx in range(len(x_train)):
        out = forward(x_train[idx],w1,w2)
        losss = loss(out, y_train[idx])
        if check(losss):
            temp += 1
        w1, w2 = back_prop(x_train[idx], y_train[idx], w1, w2, lr)
    print("epochs:", j + 1, "======== acc:", (temp/len(x_train))) 





## Predict on Test Data

In [162]:
def predict(x, y, w1, w2, use_decode_spec=False):
    Out = forward(x, w1, w2)
    for i in range(len(Out)):
        if Out[i] == 1:
            if use_decode_spec:
                y = decode_spec(y)
            else:
                y = decode(y)
            print(f"Predicted Class: {i+1}, Actual Class: {y}")
            return 1
    return 0

correct_predictions = 0
for idx in range(len(x_test)):
    predict(x_test[idx], y_test[idx], w1, w2)
    correct_predictions += 1
print(correct_predictions/len(x_test))

[1 0 0]
Predicted Class: 1, Actual Class: 1
[0 1 0]
Predicted Class: 2, Actual Class: 2
[0 0 1]
Predicted Class: 3, Actual Class: 3
[1 0 0]
Predicted Class: 1, Actual Class: 1
[1 0 0]
Predicted Class: 1, Actual Class: 1
[1 0 0]
Predicted Class: 1, Actual Class: 1
[0 0 1]
Predicted Class: 3, Actual Class: 3
[0 0 1]
Predicted Class: 3, Actual Class: 3
[1 0 0]
Predicted Class: 1, Actual Class: 1
[0 0 1]
Predicted Class: 3, Actual Class: 3
[0 1 0]
Predicted Class: 1, Actual Class: 2
[0 1 0]
Predicted Class: 2, Actual Class: 2
[0 1 0]
Predicted Class: 2, Actual Class: 2
[0 1 0]
Predicted Class: 2, Actual Class: 2
[0 1 0]
Predicted Class: 2, Actual Class: 2
[0 1 0]
Predicted Class: 2, Actual Class: 2
[1 0 0]
Predicted Class: 1, Actual Class: 1
[1 0 0]
Predicted Class: 1, Actual Class: 1
[1 0 0]
Predicted Class: 1, Actual Class: 1
[0 0 1]
Predicted Class: 3, Actual Class: 3
[0 1 0]
Predicted Class: 2, Actual Class: 2
[1 0 0]
Predicted Class: 1, Actual Class: 1
[0 0 1]
Predicted Class: 3, Actu

## K-Fold Cross Validation

In [163]:
df_copy = df
splits=5
kf = KFold(n_splits=splits, shuffle=True)
data = df_copy.values
np.random.shuffle(data)
folds = np.array_split(data, 5)
epochs = 500
lr = 0.01
accuracy = 0
fold_classes = []
for fold in folds:
    classes = []
    for row in fold:
        c = (row[0])
        row = np.delete(row, 0)
        classes += [convert([int(c)])][0]
    fold_classes += [classes]
fold_classes = np.array(fold_classes)

for i in range(len(folds)):
    fold = np.delete(folds[i], 0, axis=1)
    folds[i] = fold

for fold_idx in range(len(folds)):
    prev_fold = 0
    prev_fold_classes = 0
    for other_fold_idx in range(len(folds)):
        if fold_idx == other_fold_idx:
            continue
        if type(prev_fold) == type(0):
            prev_fold = folds[other_fold_idx]
            prev_fold_classes = fold_classes[other_fold_idx]
        else:
            np.concatenate((prev_fold, folds[other_fold_idx]), axis=0)
            np.concatenate((prev_fold_classes, fold_classes[other_fold_idx]), axis=0)
    w1 = generate_wt(13, 100)
    w2 = generate_wt(100,3)
    check = lambda x: x==0
    for j in range(epochs):
        temp = 0
        for idx in range(len(prev_fold)):
            out = forward(prev_fold[idx],w1,w2)
            losss = loss(out, prev_fold_classes[fold_idx])
            if check(losss):
                temp += 1
            w1, w2 = back_prop(prev_fold[idx], prev_fold_classes[fold_idx], w1, w2, lr)
            
    correct_predictions = 0
    fold = folds[i]
    output = fold_classes[i]
    for k in range(len(fold)):
        predict(fold[k], output[k], w1, w2, use_decode_spec=True)
        correct_predictions += 1
    accuracy += correct_predictions/len(x_test)
    
print(f"Average prediction of the {splits} cross-folds: {accuracy/splits:.2f}")

  


Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 2
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual Class: 1
Predicted Class: 1, Actual C

## Conclusion

This project was challenging not just because I was unable to use common machine learning libraries but because I was forced to critically think through how a Multilayer Perceptron model really works. My model manages to average above 95% accuracy after many many hours of work. I believe I should expect a slightly lower accuracy though as I am not taking into account parameter penalization. I would love to keep working iwht this perceptron to see how many layers really make it "optimal".  Also, I am unsure if my model being fully connected impacts the overall accuracy or not, but I think removing some connections could be interesting. I feel like I learned quite a bit about feed forward networks and how back propogation works. I enjoyed working on this project qute a bit, and I can't wait to see what we do next.

## Credit to the following
- https://towardsdatascience.com/neural-net-from-scratch-using-numpy-71a31f6e3675
- https://medium.com/analytics-vidhya/neural-network-mnist-classifier-from-scratch-using-numpy-library-94bbcfed7eae
- https://www.geeksforgeeks.org/implementation-of-neural-network-from-scratch-using-numpy/
- https://machinelearninggeek.com/multi-layer-perceptron-neural-network-using-python/
- https://www.youtube.com/watch?v=0oWnheK-gGk
- https://machinelearningmastery.com/k-fold-cross-validation/
- Peers: Alex Kiefer
- Dr. Tang's comments