# Trabalho 1 - Machine Learning II 
Prof: Carlos Padilha

#### Alunos:  

Roberto A. Coutinho  
Thais Galho


## Sistemas com Multi-classificadores ou Ensembles

#### Este trabalho visa avaliar o entendimento em relaçãao á construção de sistemas com multi-classificadores ou ensembles. Para tal, os alunos deverão fazer o seguinte:


* Implementar o algoritmo AdaBoost (nos mesmos moldes que fizemos com o algoritmo Bagging).
    – Podem escolher qualquer tipo de classificador (MLP, SVM, etc).
* Processar os dados presente no arquivo sonar.all-data.
* Realizar treinamento e teste usando validação cruzada com 10 folds.
* Avaliar os resultados em termos de acurácia, recall e precisão.

Obs: O trabalho pode ser feito em dupla e deve ser enviado por email (carlos.engcomp@gmail.com).

In [1]:
import numpy as np
import pandas as pd

# Modelos
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

# K-fold CrossValidation
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

# Métricas
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [2]:
imported_data = pd.read_csv('sonar.all-data.csv', header=None)
imported_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [3]:
# Separação entre dados e labels

labels = imported_data.iloc[:,-1]
data = imported_data.iloc[:,:-1]

<h2>Treinamento</h2>

In [4]:

# utiliza 25% do dataset para teste
X_train, X_test, Y_train, Y_test = train_test_split(data, labels, train_size=0.75, test_size=0.25, stratify=labels)

In [5]:
n_iterations = 10

n_train, n_test = len(X_train), len(X_test)

# Initialize weights
w = np.ones(n_train) / n_train

pred_train, pred_test = [np.zeros(n_train), np.zeros(n_test)]

In [6]:
len(X_train), len(w)

(156, 156)

In [11]:

# Fit a simple decision tree first
clf_tree = DecisionTreeClassifier(max_depth = 1, random_state = 1)
    
    
for i in range(n_iterations):
    # Fit a classifier with the specific weights
    clf_tree.fit(X_train, Y_train, sample_weight = w)
    
    pred_train_i = clf_tree.predict(X_train)
    #print(pred_train_i)
    
    pred_test_i = clf_tree.predict(X_test)
    
    # Indicator function
    miss = [int(x) for x in (pred_train_i != Y_train)]
    print("Miss : "+str(miss))
    
    # Equivalent with 1/-1 to update weights
    miss2 = [x if x==1 else -1 for x in miss]
    
    # Error
    err_m = np.dot(w,miss) / sum(w)
    print("Error : "+str(err_m))
    
    # Alpha
    alpha_m = 0.5 * np.log( (1 - err_m) / float(err_m))
    print("Alpha : "+str(alpha_m))
    
    
    # New weights
    w = np.multiply(w, np.exp([float(x) * alpha_m for x in miss2]))
    print("New weights : ")
    print(w)
    
    print()
    
    # Add to prediction
    #pred_train = [sum(x) for x in zip(pred_train, [x * alpha_m for x in pred_train_i])]
    #pred_test = [sum(x) for x in zip(pred_test, [x * alpha_m for x in pred_test_i])]

Miss : [0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0]
Error : 0.33839899021442155
Alpha : 0.3352185131293591
New weights : 
[1.02963930e-03 5.34463881e-03 3.99860648e-04 5.78312587e-04
 8.30660814e-04 2.21389643e-03 1.69312012e-03 1.33009376e-03
 2.62467597e-03 1.36045635e-03 2.13729512e-04 2.57322868e-03
 5.33919442e-04 9.61031628e-04 2.25893438e-03 6.55566636e-03
 1.80331553e-04 3.44082561e-04 1.96309074e-04 8.11361902e-04
 4.70894210e-03 2.67659049e-04 1.96309074e-04 4.17205854e-04
 1.94722277e-04 1.28895215e-03 1.81262008e-03 9.62390126e-05
 3.28277454e-03 1.40027526e

In [49]:
type(alpha_m)

numpy.float64

RangeIndex(start=0, stop=156, step=1)

In [74]:
zip(pred_train, [x * alpha_m for x in pred_train_i])

AttributeError: 'numpy.ndarray' object has no attribute 'index'