In [16]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')



In [17]:
dados = pd.read_csv('featuresTraining.csv')
labels = pd.read_csv('labelsTraining.csv')

In [18]:
dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 51 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  15 non-null     int64  
 1   0           15 non-null     float64
 2   1           15 non-null     float64
 3   2           15 non-null     float64
 4   3           15 non-null     float64
 5   4           15 non-null     float64
 6   5           15 non-null     float64
 7   6           15 non-null     float64
 8   7           15 non-null     float64
 9   8           15 non-null     float64
 10  9           15 non-null     float64
 11  10          15 non-null     float64
 12  11          15 non-null     float64
 13  12          15 non-null     float64
 14  13          15 non-null     float64
 15  14          15 non-null     float64
 16  15          15 non-null     float64
 17  16          15 non-null     float64
 18  17          15 non-null     float64
 19  18          15 non-null     flo

In [19]:
X = dados
y = labels
print(y)

      0
0   0.0
1   0.0
2   0.0
3   1.0
4   1.0
5   1.0
6   2.0
7   2.0
8   2.0
9   0.0
10  0.0
11  1.0
12  1.0
13  2.0
14  2.0


In [20]:
len(labels)

15

In [21]:
len(dados)

15

In [22]:
## Metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_curve, auc

## Models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42,shuffle=True,stratify=y)

In [26]:
knn = KNeighborsClassifier(n_neighbors=5,p=1)

knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)


print("Resultado de classificação usando KNN: \n", classification_report(y_test, y_pred))
print("Matriz de confusão: \n", confusion_matrix(y_test,y_pred))


Resultado de classificação usando KNN: 
               precision    recall  f1-score   support

         0.0       0.50      1.00      0.67         1
         1.0       0.00      0.00      0.00         1
         2.0       1.00      1.00      1.00         1

    accuracy                           0.67         3
   macro avg       0.50      0.67      0.56         3
weighted avg       0.50      0.67      0.56         3

Matriz de confusão: 
 [[1 0 0]
 [1 0 0]
 [0 0 1]]


In [28]:
# %% Random Forest Classifier

rf_clf = RandomForestClassifier(n_estimators=100,max_depth=8,random_state=42,
                                verbose=1,class_weight="balanced")

rf_clf.fit(X_train,y_train)
y_pred = rf_clf.predict(X_test)

print("Resultado de classificação usando Random Forest: \n", classification_report(y_test, y_pred))
print("Matriz de confusão: \n", confusion_matrix(y_test,y_pred))

Resultado de classificação usando Random Forest: 
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         1
         1.0       1.00      1.00      1.00         1
         2.0       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.50      0.67      0.56         3
weighted avg       0.50      0.67      0.56         3

Matriz de confusão: 
 [[0 0 1]
 [0 1 0]
 [0 0 1]]


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [29]:
from sklearn.ensemble import (AdaBoostClassifier, 
                              GradientBoostingClassifier, 
                              ExtraTreesClassifier,
                              RandomForestClassifier)

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier

In [30]:
class ModeloAuxiliar(object):
    def __init__(self, clf, seed=123, params=None):
        if params:
            params['random_state'] = seed
            self.clf = clf(**params)
        else:
            self.clf = clf()
    def predict(self, x):
        return self.clf.predict(x)
    
    def fit(self,x,y):
        return self.clf.fit(x,y)
    
    def feature_importances(self,x,y):
        return self.clf.fit(x,y).feature_importances_
    
    def score(self,x,y):
        return self.clf.score(x,y)

In [31]:
modelos = [{'nome': 'logreg',
            'modelo': LogisticRegression},
           {'nome': 'etree',
            'modelo': ExtraTreesClassifier},
           {'nome': 'gradboost',
             'modelo': GradientBoostingClassifier}, 
           {'nome': 'adaboost',
             'modelo': AdaBoostClassifier},
           {'nome': 'SVC',
            'modelo': SVC},
           {'nome': 'KNN',
            'modelo': KNeighborsClassifier},
           {'nome': 'GaussianNB',
            'modelo': GaussianNB},
           {'nome': 'Perceptron',
            'modelo': Perceptron},
           {'nome': 'LinearSVC',
            'modelo': LinearSVC},
           {'nome': 'SGD',
            'modelo': SGDClassifier},
           {'nome': 'Dtree',
            'modelo': DecisionTreeClassifier},
           {'nome': 'RForest',
            'modelo': RandomForestClassifier}
          ]

In [32]:
resultados = []
for model in modelos:
    x = ModeloAuxiliar(clf=model['modelo'])
    # treinar o modelo
    x.fit(X_train, y_train)
    
    # gerar predicao
    x_pred = x.predict(X_test)
    
    # gerar score
    acuracidade = round(x.score(X_test,y_test)*100,2)
    
    resultados.append({'nome': model['nome'],
                       'score': acuracidade,
                       'pred': x_pred })

In [33]:
models = pd.DataFrame(resultados)

In [34]:
models[['nome','score']].sort_values(by='score', ascending=False)

Unnamed: 0,nome,score
3,adaboost,100.0
6,GaussianNB,100.0
9,SGD,100.0
0,logreg,66.67
1,etree,66.67
2,gradboost,66.67
4,SVC,66.67
5,KNN,66.67
7,Perceptron,66.67
8,LinearSVC,66.67


In [35]:
from sklearn.model_selection import KFold 
from sklearn.model_selection import cross_val_score 
from sklearn.model_selection import cross_val_predict 

In [37]:
kfold = KFold(n_splits=10, random_state=None)

In [38]:
ada = AdaBoostClassifier(n_estimators=200, random_state=123, learning_rate=0.1)

In [39]:
resultado = cross_val_score(ada, X_train, y_train, cv=kfold, scoring='accuracy')

print('O score cross-validado do AdaBoost é:', resultado.mean())

O score cross-validado do AdaBoost é: 0.5


In [40]:
resultado

array([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.])