In [31]:
import pandas as pd
import numpy as np
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import  accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix

In [32]:
fertilidade_df = pd.read_csv('fertility.csv')

In [33]:
fertilidade_df.head(5)

Unnamed: 0,season,age,childish_diseases,accident,surgical,high_fevers,frequency_alcohol,smoking_habit,hours_sitting,diagnosis
0,-0.33,0.69,0,1,1,0,0.8,0,0.88,N
1,-0.33,0.94,1,0,1,0,0.8,1,0.31,O
2,-0.33,0.5,1,0,0,0,1.0,-1,0.5,N
3,-0.33,0.75,0,1,1,0,1.0,-1,0.38,N
4,-0.33,0.67,1,1,0,0,0.8,-1,0.5,O


In [34]:
fertilidade_df['diagnosis'] = fertilidade_df['diagnosis'].map( {'N' : 1, 'O': 0}).astype(int)

In [35]:
fertilidade_df.head(5)

Unnamed: 0,season,age,childish_diseases,accident,surgical,high_fevers,frequency_alcohol,smoking_habit,hours_sitting,diagnosis
0,-0.33,0.69,0,1,1,0,0.8,0,0.88,1
1,-0.33,0.94,1,0,1,0,0.8,1,0.31,0
2,-0.33,0.5,1,0,0,0,1.0,-1,0.5,1
3,-0.33,0.75,0,1,1,0,1.0,-1,0.38,1
4,-0.33,0.67,1,1,0,0,0.8,-1,0.5,0


In [36]:
X_df = fertilidade_df[['season','age','childish_diseases','accident','surgical','high_fevers','frequency_alcohol','smoking_habit','hours_sitting']]

In [37]:
Y_df = fertilidade_df['diagnosis']

In [38]:
Xdummies_df = pd.get_dummies(X_df)

In [39]:
X = Xdummies_df.values

In [40]:
Y = Y_df.values

In [41]:
porcentagem_treino = 0.90
tamanho_treino = int(porcentagem_treino * len(Y))
tamanho_validacao = len(Y) - tamanho_treino
treino_dados = X[:tamanho_treino]
treino_marcacoes = Y[:tamanho_treino]
dados_teste = X[-tamanho_validacao:]
marcacoes_teste = Y[-tamanho_validacao:]

In [42]:
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes,dados_teste,marcacoes_teste):
    k = 10
    scores = cross_val_score(modelo,treino_dados,treino_marcacoes,cv = k)
    taxa_de_acerto = np.mean(scores)
    
    modelo.fit(treino_dados, treino_marcacoes)
    previsoes = modelo.predict(dados_teste)
    
    accuracyScore = accuracy_score(marcacoes_teste, previsoes)
    precisionScore = average_precision_score(marcacoes_teste, scores)
    recallScoreWeighted = recall_score(marcacoes_teste, previsoes, average='weighted')
    recallScoreMacro = recall_score(marcacoes_teste, previsoes, average='macro')
    recallScoreMicro = recall_score(marcacoes_teste, previsoes, average='micro')
    meanSquaredError = mean_squared_error(marcacoes_teste, previsoes) 
    confunsionMatrix = confusion_matrix(marcacoes_teste, previsoes)
    
    print(confunsionMatrix)
    print('Acerto %s: %.2f' % (nome, (taxa_de_acerto * 100)))
    print('Acuracia %s: %.2f' % (nome, (accuracyScore * 100)))
    print('Precisão %s: %.2f' % (nome, (precisionScore * 100)))
    print('Recall (weighted)  %s: %.2f' % (nome, (recallScoreWeighted * 100)))
    print('Recall (macro)  %s: %.2f' % (nome, (recallScoreMacro * 100)))
    print('Recall (micro)  %s: %.2f' % (nome, (recallScoreMicro * 100)))
    print('Mean Square Error  %s: %.2f' % (nome, (meanSquaredError * 100)))

In [43]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
modeloOneVsRest = OneVsRestClassifier(LinearSVC(random_state = 0))
resultadoOneVsRest = fit_and_predict("OneVsRest", modeloOneVsRest,treino_dados, treino_marcacoes,dados_teste,marcacoes_teste)

[[0 1]
 [0 9]]
Acerto OneVsRest: 87.86
Acuracia OneVsRest: 90.00
Precisão OneVsRest: 87.93
Recall (weighted)  OneVsRest: 90.00
Recall (macro)  OneVsRest: 50.00
Recall (micro)  OneVsRest: 90.00
Mean Square Error  OneVsRest: 10.00


In [44]:
from sklearn.multiclass import OneVsOneClassifier
modeloOneVsOne = OneVsOneClassifier(LinearSVC(random_state = 0))
resultadoOneVsOne = fit_and_predict("OneVsOne", modeloOneVsOne,treino_dados, treino_marcacoes,dados_teste,marcacoes_teste)

[[0 1]
 [0 9]]
Acerto OneVsOne: 87.86
Acuracia OneVsOne: 90.00
Precisão OneVsOne: 87.93
Recall (weighted)  OneVsOne: 90.00
Recall (macro)  OneVsOne: 50.00
Recall (micro)  OneVsOne: 90.00
Mean Square Error  OneVsOne: 10.00


In [45]:
from sklearn.tree import DecisionTreeClassifier
modeloDecisionTree = DecisionTreeClassifier(criterion = "entropy", random_state = 100)
resultadoOneVsOne = fit_and_predict("DecisionTree", modeloDecisionTree,treino_dados, treino_marcacoes,dados_teste,marcacoes_teste)

[[0 1]
 [2 7]]
Acerto DecisionTree: 75.61
Acuracia DecisionTree: 70.00
Precisão DecisionTree: 85.56
Recall (weighted)  DecisionTree: 70.00
Recall (macro)  DecisionTree: 38.89
Recall (micro)  DecisionTree: 70.00
Mean Square Error  DecisionTree: 30.00
