# Machine Learning Classification

In [19]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn import svm

## Dataset preparation

In [3]:
datas = pd.read_csv('biometria.csv')

In [4]:
labelencoder = LabelEncoder()
datas["Esporte"] = labelencoder.fit_transform(datas["Esporte"])
#datas['Esporte']

In [5]:
# data["Species"]
# Construct a dataframe from a dictionary
sports = pd.DataFrame({'Esporte': ['V', 'F']})
#sports
#datas.head(n=30)

In [40]:
X = datas[['Altura (m)', 'Peso (kg)']] # Features
y = datas['Esporte'].values # Labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True)
#X_train

# Mean Normalization 
#(Standarize the features to follow the normal distribution, to obtain a faster & better classifier)
sc = StandardScaler() 
X_train_array = sc.fit_transform(X_train.values) #calculate μ & σ(fit) and apply the transformation(transform)
#X_test_array = sc.fit_transform(X_test.values) 

# Assign the scaled data to a DataFrame & use the index and columns arguments to keep your original indices and column names:
X_train = pd.DataFrame(X_train_array, index=X_train.index, columns=X_train.columns)
#X_train

## Multi Layer Perceptron

<img src="mlp.png">

In [72]:
mlp = MLPClassifier(hidden_layer_sizes=(20), activation='identity', solver='sgd',learning_rate_init=0.1,max_iter=500)
#‘sgd’ refers to stochastic gradient descent.
# activation : {‘identity’, ‘logistic’, ‘tanh’, ‘relu’}, default ‘relu’

In [73]:
mlp.fit(X_train, y_train)

MLPClassifier(activation='identity', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=20, learning_rate='constant',
       learning_rate_init=0.1, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='sgd', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [74]:

mlp_pred = mlp.predict(X_test)

# Score on train data in order to compare it with validation score and check for bias or variance error
#mlp.score(X_train, y_train)

In [75]:
# But it is not going to generalize into new data

#mlp.score(mlp_pred,y_test)

print (metrics.accuracy_score(y_test, mlp_pred))

#print (mlp_pred[0:5])
#print (y_test[0:5])

0.5555555555555556


# K-Nearest Neighbor

<img src="knn.png">

In [85]:
knn = KNeighborsClassifier(n_neighbors=7)

In [86]:
# Train the model using the training sets
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=7, p=2,
           weights='uniform')

In [91]:
knn_pred = knn.predict(X_test)

#knn.score(X_train, y_train)

#knn.score(X_test,y_test)
print (metrics.accuracy_score(y_test, knn_pred))

0.4444444444444444


# Support Vector Machine

<img src="svm_1.png">

In [15]:
svm_model = svm.SVC(kernel = 'rbf', C = 100, gamma = 'auto', probability=False)
svm_model.fit(X_train, y_train)

SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
svm_model.score(X_train, y_train)

0.9166666666666666

In [90]:
#svm_model.score(X_test,y_test)

svm_pred = knn.predict(X_test)
print (metrics.accuracy_score(y_test, svm_pred))

0.4444444444444444


# Conclusion

the results obtained were less than 60% accuracy.

# References

- https://github.com/annisap/irs-mlp/blob/master/MLP-LearningCurves.ipynb
- https://www.python-course.eu/neural_networks.php
- https://www.datacamp.com/community/tutorials/k-nearest-neighbor-classification-scikit-learn
- https://medium.com/@adi.bronshtein/a-quick-introduction-to-k-nearest-neighbors-algorithm-62214cea29c7