In [1]:
import pandas as pd
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [2]:
data = pd.read_csv('divorce.csv', sep=';')
data.head()

Unnamed: 0,Atr1,Atr2,Atr3,Atr4,Atr5,Atr6,Atr7,Atr8,Atr9,Atr10,...,Atr46,Atr47,Atr48,Atr49,Atr50,Atr51,Atr52,Atr53,Atr54,Class
0,2,2,4,1,0,0,0,0,0,0,...,2,1,3,3,3,2,3,2,1,1
1,4,4,4,4,4,0,0,4,4,4,...,2,2,3,4,4,4,4,2,2,1
2,2,2,2,2,1,3,2,1,1,2,...,3,2,3,1,1,1,2,2,2,1
3,3,2,3,2,3,3,3,3,3,3,...,2,2,3,3,3,3,2,2,2,1
4,2,2,1,1,1,1,0,0,0,0,...,2,1,2,3,2,2,2,1,0,1


In [3]:
X = data.loc[:, 'Atr1':'Atr54']
Y = data.loc[:, 'Class']

In [4]:
from sklearn.model_selection import cross_validate

In [5]:
cv = StratifiedKFold(n_splits=3)

In [6]:
random_forest = cross_validate(
    RandomForestClassifier(n_estimators=100),  X, Y,
    scoring=['f1', 'recall', 'precision', 'accuracy'], cv=cv
)
random_forest = {key: '{:.3f}'.format(np.mean(value)) for key, value in random_forest.items() 
                 if key in ['test_f1', 'test_recall', 'test_precision', 'test_accuracy']}
print(random_forest)

{'test_f1': '0.974', 'test_recall': '0.952', 'test_precision': '1.000', 'test_accuracy': '0.977'}


In [7]:
logist_regression = cross_validate(
    LogisticRegression(solver='liblinear'),  X, Y,
    scoring=['f1', 'recall', 'precision', 'accuracy'], cv=cv
)
logist_regression = {key: '{:.3f}'.format(np.mean(value)) for key, value in logist_regression.items() 
                     if key in ['test_f1', 'test_recall', 'test_precision', 'test_accuracy']}
print(logist_regression)

{'test_f1': '0.969', 'test_recall': '0.952', 'test_precision': '0.989', 'test_accuracy': '0.971'}


In [8]:
k_neighbors = cross_validate(
    KNeighborsClassifier(),  X, Y,
    scoring=['f1', 'recall', 'precision', 'accuracy'], cv=cv
)
k_neighbors = {key: '{:.3f}'.format(np.mean(value)) for key, value in k_neighbors.items() 
               if key in ['test_f1', 'test_recall', 'test_precision', 'test_accuracy']}
print(k_neighbors)

{'test_f1': '0.974', 'test_recall': '0.952', 'test_precision': '1.000', 'test_accuracy': '0.977'}
