# Assignment 6: Multi-layer Perceptron Classifier
For this assignment we used the Pokemon data set from the kaggle repository

https://www.kaggle.com/abcsds/pokemon

Our data set contains 13 features consisting of 8 quantitative features:

Encyclopedia number
Sum of all Stats
Hit Points
Attack
Defense
Special Attack
Special Defense
Speed
and 5 categorical features (sex)

Name
Type 1
Type 2
Generation
Legendary

For the purposes of this assignment we only used

In [11]:
import pandas as pd #data analysis library
import matplotlib.pyplot as plt #graphing
import seaborn as sns #graphing
import sklearn
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.neural_network import MLPRegressor

In [2]:
df = pd.read_csv("Pokemon.csv") #read in data
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


## Data Preprocessing
For the purposes of this assignment, we eliminate the following features:

   * #
   * Name
   * Type 1
   * Type 2
   * Total
   * Sp. Atk
   * Sp. Def
   * Generation
   * Legendary

In [3]:
poke_stats = ['HP', "Attack", "Defense", "Speed"]
df = df.drop(columns = ['#','Name', 'Type 1', 'Type 2', 'Total', 'Sp. Atk', 'Sp. Def', 'Generation', "Legendary"])
df.head()

Unnamed: 0,HP,Attack,Defense,Speed
0,45,49,49,45
1,60,62,63,60
2,80,82,83,80
3,80,100,123,80
4,39,52,43,65


In [4]:
#set independent and dependent variables
x = df.iloc[:,1:9] #all entries from column 1 to 3
leng = df.iloc[:,1]
dim = df.iloc[:,2]
rin = df.iloc[:,3]
x

Unnamed: 0,Attack,Defense,Speed
0,49,49,45
1,62,63,60
2,82,83,80
3,100,123,80
4,52,43,65
...,...,...,...
795,100,150,50
796,160,110,110
797,110,60,70
798,160,60,80


In [5]:
y = df.iloc[:,0]
print(y)

0      45
1      60
2      80
3      80
4      39
       ..
795    50
796    50
797    80
798    80
799    80
Name: HP, Length: 800, dtype: int64


In [6]:
#train test split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 0) #20% testing

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x_train) 
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)  # apply same transformation to test data

## Training with MLP Regressor

In [7]:
#Train and fit data
mlp_clf = MLPClassifier(random_state=1, max_iter=300).fit(x_train,y_train)

print("Parameters: ")
print("Iterations: ", mlp_clf.n_iter_)
print("Learning Rate: 'constant'")
print("alpha: ", .0001)
print ("Learning Rate Iniitialization: ", .001)
print ("Tolerance: ", .0001)
print("Activation Function: 'relu'")
print("Number of Hidden Layers: ", mlp_clf.n_layers_ - 2)
print("Number of Neurons: ", 100)

#probability estimates
mlp_clf.predict_proba(x_train)

Parameters: 
Iterations:  300
Learning Rate: 'constant'
alpha:  0.0001
Learning Rate Iniitialization:  0.001
Tolerance:  0.0001
Activation Function: 'relu'
Number of Hidden Layers:  1
Number of Neurons:  100




array([[1.80454469e-03, 1.53205839e-03, 1.60626349e-03, ...,
        2.85740747e-04, 5.02183967e-03, 1.92181759e-04],
       [4.02559726e-02, 5.86126392e-05, 1.08819592e-05, ...,
        5.96435884e-02, 7.45649823e-05, 2.41567678e-05],
       [5.57619600e-06, 1.35927944e-08, 2.16242294e-08, ...,
        4.20348962e-06, 1.60309703e-07, 2.32439983e-08],
       ...,
       [2.80157333e-05, 1.94586708e-02, 6.29614332e-03, ...,
        1.05845737e-05, 3.09333958e-03, 1.27530039e-03],
       [1.78387309e-03, 1.97755372e-07, 3.61655221e-08, ...,
        1.39486045e-03, 6.76748936e-07, 7.79922048e-08],
       [5.25636458e-04, 1.72165319e-07, 2.01918189e-08, ...,
        8.62575173e-05, 4.20604456e-07, 6.95568050e-08]])

In [8]:
ypred = mlp_clf.predict(x_train)
ypred

array([ 50,  60,  80,  60,  70,  65,  60,  50,  65,  65,  50,  60, 105,
        65,  80, 100,  80,  60,  50,  60,  40,  50,  45,  50,  50, 100,
        60,  70, 100,  50,  80,  45,  75,  60,  50,  60, 106,  60,  45,
        60,  50,  45,  91, 100,  90,  60,  40,  80,  65,  60,  45,  40,
       100,  50,  40,  50,  65,  40,  45,  60,  60,  80,  45,  45,  65,
        60,  90,  45, 105,  45,  50,  90,  45,  60,  50,  80,  60,  70,
        45,  40,  65, 100,  90,  65,  60,  45,  65,  60,  45,  50,  80,
        40,  45,  70,  70,  65,  75,  80,  60,  65, 100,  45,  45, 100,
       105,  50,  90, 160,  70,  45, 100,  45,  60,  45,  65, 100,  80,
        65,  60,  60,  60, 100,  70,  50,  50, 100,  60,  60,  75,  60,
        45,  80,  40,  50,  45,  75,  70,  90,  45,  60,  60,  60,  60,
        45,  60,  45,  65,  50,  45,  60,  60,  50,  50,  80,  50, 100,
        80,  65,  60,  40, 105,  70,  50,  50,  45,  50,  90,  80,  60,
        20,  60,  45,  38,  45,  45,  75,  45, 100, 100,  40,  6

## Training Set Performance Metrics

In [9]:
mlp_score = mlp_clf.score(x_train,y_train)
mlp_score

0.19375

ValueError: too many values to unpack (expected 4)

In [None]:
print("Evaluation Metrics for Training Set: ")
cr = classification_report(y_train,ypred)
print(cr)
print("Misclassification Rate = ", (fp + fn)/(tp + tn + fp + fn))

## Test Set Performance Metrics

In [None]:
y_test_pred = mlp_clf.predict(x_test)
y_test_pred

In [None]:
mlp_test_score = mlp_clf.score(x_test,y_test)
mlp_test_score

In [None]:
print("Evaluation Metrics for Test Set: ")

# MLP Regressor Alternate Parameters #1

In [None]:

#Train and fit data
mlp_clf = MLPClassifier(hidden_layer_sizes=(5000,5), activation='tanh', 
                        alpha=0.00001, learning_rate='adaptive', learning_rate_init=0.001,
                        max_iter=500, random_state=1, tol=0.0001).fit(x_train,y_train)

print("Parameters: ")
print("Iterations: ", mlp_clf.n_iter_)
print("Learning Rate: 'adaptive'")
print("alpha: ", .00001)
print ("Learning Rate Iniitialization: ", .001)
print ("Tolerance: ", .0001)
print("Activation Function: 'tanh'")
print("Number of Hidden Layers: ", mlp_clf.n_layers_ - 2)
print("Number of Neurons: ", 5000)

#probability estimates
mlp_clf.predict_proba(x_train)

#probability estimates
mlp_clf.predict_proba(x_train)
ypred = mlp_clf.predict(x_train)
print("Training Set Performance Metrics")



print("Evaluation Metrics for Training Set: ")



print("Test Set Performance Metrics")




# MLP Regressor Alternate Parameters #2

In [None]:

#Train and fit data
mlp_clf = MLPClassifier(hidden_layer_sizes=(500,), activation='logistic', 
                        alpha=0.001, learning_rate='invscaling', learning_rate_init=0.0001,
                        max_iter=500, random_state=1, tol=0.001).fit(x_train,y_train)

print("Parameters: ")
print("Iterations: ", mlp_clf.n_iter_)
print("Learning Rate: 'invscaling'")
print("alpha: ", .001)
print ("Learning Rate Iniitialization: ", .0001)
print ("Tolerance: ", .001)
print("Activation Function: 'logistic'")
print("Number of Hidden Layers: ", mlp_clf.n_layers_ - 2)
print("Number of Neurons: ", 500)

#probability estimates
mlp_clf.predict_proba(x_train)

#probability estimates
mlp_clf.predict_proba(x_train)
ypred = mlp_clf.predict(x_train)
print("Training Set Performance Metrics")

print("MLP Classifer: ")
print(cm)

print("Evaluation Metrics for Training Set: ")

print("Test Set Performance Metrics")


# MLP Classifier Alternate Parameters # 3


In [None]:
#Train and fit data
mlp_clf = MLPClassifier(hidden_layer_sizes=(50,5), activation='identity', 
                        alpha=0.01, learning_rate='constant', learning_rate_init=0.01,
                        max_iter=500, random_state=1, tol=0.001).fit(x_train,y_train)

print("Parameters: ")
print("Iterations: ", mlp_clf.n_iter_)
print("Learning Rate: 'constant'")
print("alpha: ", .01)
print ("Learning Rate Iniitialization: ", .01)
print ("Tolerance: ", .001)
print("Activation Function: 'identity'")
print("Number of Hidden Layers: ", mlp_clf.n_layers_ - 2)
print("Number of Neurons: ", 50)

#probability estimates
mlp_clf.predict_proba(x_train)

#probability estimates
mlp_clf.predict_proba(x_train)
ypred = mlp_clf.predict(x_train)
print("Training Set Performance Metrics")


print("Evaluation Metrics for Training Set: ")


print("Test Set Performance Metrics")


print("Evaluation Metrics for Test Set: ")