In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix

from sklearn.neural_network import MLPClassifier

In [2]:
df = pd.read_csv('../datas/emprego.csv')
df.head()

Unnamed: 0,sl_no,gender,ssc_p,ssc_b,hsc_p,hsc_b,hsc_s,degree_p,degree_t,workex,etest_p,specialisation,mba_p,status,salary
0,1,M,67.0,Others,91.0,Others,Commerce,58.0,Sci&Tech,No,55.0,Mkt&HR,58.8,Placed,270000.0
1,2,M,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,Placed,200000.0
2,3,M,65.0,Central,68.0,Central,Arts,64.0,Comm&Mgmt,No,75.0,Mkt&Fin,57.8,Placed,250000.0
3,4,M,56.0,Central,52.0,Central,Science,52.0,Sci&Tech,No,66.0,Mkt&HR,59.43,Not Placed,
4,5,M,85.8,Central,73.6,Central,Commerce,73.3,Comm&Mgmt,No,96.8,Mkt&Fin,55.5,Placed,425000.0


In [3]:
x = df[['ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p']]
y = df['status']

Normalização

In [4]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler = scaler.fit(x)
x = scaler.transform(x)

In [5]:
print(x)

[[0.53823954 0.88962109 0.19512195 0.10416667 0.28448276]
 [0.79241394 0.68088962 0.6702439  0.76041667 0.56484258]
 [0.49701093 0.5107084  0.34146341 0.52083333 0.2470015 ]
 ...
 [0.53823954 0.49423394 0.56097561 0.1875     0.69377811]
 [0.68253968 0.47775947 0.19512195 0.41666667 0.33808096]
 [0.43516801 0.34596376 0.07317073 0.8125     0.33770615]]


In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=18)

In [7]:
modelo = MLPClassifier(
    hidden_layer_sizes=(20, 10),
    activation='relu',
    solver='adam',
    max_iter=3000,
    shuffle=True,
    random_state=20,
    validation_fraction=0.2,
    verbose=False)

modelo.fit(X_train, y_train)

In [8]:
predicao = modelo.predict(X_test)

print(confusion_matrix(y_test,predicao))
print(classification_report(y_test,predicao))

[[14  9]
 [ 4 38]]
              precision    recall  f1-score   support

  Not Placed       0.78      0.61      0.68        23
      Placed       0.81      0.90      0.85        42

    accuracy                           0.80        65
   macro avg       0.79      0.76      0.77        65
weighted avg       0.80      0.80      0.79        65



Grid search

In [9]:
from sklearn.model_selection import GridSearchCV

parametros = {'activation': ['logistic', 'tanh', 'relu'], 'hidden_layer_sizes':[(20,10), (20), (5, 5, 5)]}

modelo = MLPClassifier(
    hidden_layer_sizes=(20, 10),
    solver='adam',
    max_iter=3000,
    shuffle=True,
    random_state=20,
    validation_fraction=0.2,
    verbose=False)

grid = GridSearchCV(modelo, parametros)

grid.fit(X_train, y_train)

In [10]:
print( grid.best_params_ )

{'activation': 'relu', 'hidden_layer_sizes': (5, 5, 5)}


In [11]:
modelo = grid.best_estimator_

predicao = modelo.predict(X_test)

print(confusion_matrix(y_test,predicao))
print(classification_report(y_test,predicao))

[[17  6]
 [ 5 37]]
              precision    recall  f1-score   support

  Not Placed       0.77      0.74      0.76        23
      Placed       0.86      0.88      0.87        42

    accuracy                           0.83        65
   macro avg       0.82      0.81      0.81        65
weighted avg       0.83      0.83      0.83        65

