In [69]:
from data.GetGeneralData import GetGeneralData
from data.GetAnimalType import GetAnimalType

In [71]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [13]:
X_train, X_test, y_train, y_test = GetGeneralData()

In [67]:
def getLabeledResult(predictions):
    animalTypeNames = []
    for result in predictions: 
        animalTypeName = GetAnimalType(result)
        animalTypeNames.append(animalTypeName)
    return animalTypeNames

In [20]:
from sklearn.ensemble import RandomForestClassifier

mejor_score = 0
mejor_n_estimators = None
mejor_modelo_rf = None
rf_error_train = []
rf_error_val = []

n_estimators_grid = np.linspace(2,80,40).astype(int)

for n_estimators in n_estimators_grid: 
    modelo_rf = RandomForestClassifier(n_estimators=n_estimators, random_state=0)
    modelo_rf.fit(X_train, y_train)
    
    score_train = modelo_rf.score(X_train, y_train)
    rf_error_train.append(1 - score_train)
    
    score_val = modelo_rf.score(X_test, y_test)
    rf_error_val.append(1 - score_val)
    
    if score_val > mejor_score: 
        mejor_score = score_val
        mejor_n_estimators = n_estimators
        mejor_modelo_rf = modelo_rf

modelo_rf = mejor_modelo_rf
modelo_rf.fit(X_train, y_train)

print ("Mejor valor de n_estimators :", mejor_n_estimators)
print ("Exactitud de RandomForest en conjunto de entrenamiento :", modelo_rf.score(X_train, y_train))
print ("Exactitud de RandomForest en conjunto de validación    :", modelo_rf.score(X_test, y_test))

Mejor valor de n_estimators : 2
Exactitud de RandomForest en conjunto de entrenamiento : 1.0
Exactitud de RandomForest en conjunto de validación    : 1.0


In [72]:
predictions = modelo_rf.predict(X_test)
predictedNames = getLabeledResult(predictions)
testNames = getLabeledResult(Y_test)
print(confusion_matrix(testNames, predictedNames))
print(classification_report(testNames, predictedNames))

[[ 8  0  0  0  0  0  0]
 [ 0 10  0  0  0  0  0]
 [ 0  0  9  0  0  0  0]
 [ 0  0  0  9  0  0  0]
 [ 0  0  0  0  9  0  0]
 [ 0  0  0  0  0  7  0]
 [ 0  0  0  0  0  0  6]]
             precision    recall  f1-score   support

  Amphibian       1.00      1.00      1.00         8
       Bird       1.00      1.00      1.00        10
        Bug       1.00      1.00      1.00         9
       Fish       1.00      1.00      1.00         9
Invertebrat       1.00      1.00      1.00         9
     Mammal       1.00      1.00      1.00         7
    Reptile       1.00      1.00      1.00         6

avg / total       1.00      1.00      1.00        58



In [77]:
n_estimators=200
min_samples_leaf_grid = np.linspace(1,10,10).astype(int) 

mejor_score = 0
mejor_min_samples_leaf = None

for min_samples_leaf in min_samples_leaf_grid: 
    base_estimator = DecisionTreeClassifier(min_samples_leaf=min_samples_leaf, random_state=0)
    modelo_ab = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=n_estimators, random_state=0, algorithm="SAMME")
    modelo_ab.fit(X_train, y_train)
    
    score_val = modelo_ab.score(X_test, y_test)
    
    if score_val > mejor_score: 
        mejor_score = score_val
        mejor_min_samples_leaf = min_samples_leaf
        mejor_modelo_ab = modelo_ab

modelo_ab = mejor_modelo_ab
modelo_ab.fit(X_train, y_train)

print ("Mejor valor de min_samples_leaf :", mejor_min_samples_leaf)
print ("Exactitud de AdaBoost en conjunto de entrenamiento :", modelo_ab.score(X_train, y_train))
print ("Exactitud de AdaBoost en conjunto de validación    :", mejor_score)

Mejor valor de min_samples_leaf : 1
Exactitud de AdaBoost en conjunto de entrenamiento : 1.0
Exactitud de AdaBoost en conjunto de validación    : 1.0


In [78]:
predictions = modelo_ab.predict(X_test)
predictedNames = getLabeledResult(predictions)
testNames = getLabeledResult(Y_test)
print(confusion_matrix(testNames, predictedNames))
print(classification_report(testNames, predictedNames))

[[ 8  0  0  0  0  0  0]
 [ 0 10  0  0  0  0  0]
 [ 0  0  9  0  0  0  0]
 [ 0  0  0  9  0  0  0]
 [ 0  0  0  0  9  0  0]
 [ 0  0  0  0  0  7  0]
 [ 0  0  0  0  0  0  6]]
             precision    recall  f1-score   support

  Amphibian       1.00      1.00      1.00         8
       Bird       1.00      1.00      1.00        10
        Bug       1.00      1.00      1.00         9
       Fish       1.00      1.00      1.00         9
Invertebrat       1.00      1.00      1.00         9
     Mammal       1.00      1.00      1.00         7
    Reptile       1.00      1.00      1.00         6

avg / total       1.00      1.00      1.00        58



In [92]:
from sklearn.grid_search import GridSearchCV

param_grid = {"base_estimator__criterion" : ["gini", "entropy"],
              "base_estimator__splitter" :   ["best", "random"],
              "n_estimators": [1, 2, 50, 150, 200]
             }


DTC = DecisionTreeClassifier(random_state = 11, max_features = "auto", class_weight = "auto",max_depth = None)

ABC = AdaBoostClassifier(base_estimator = DTC)

# run grid search
grid_search_ABC = GridSearchCV(ABC, param_grid=param_grid, scoring = 'roc_auc')

In [100]:
m=grid_search_ABC.estimator
m

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight='auto', criterion='gini', max_depth=None,
            max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=11, splitter='best'),
          learning_rate=1.0, n_estimators=50, random_state=None)

In [98]:
model = m.fit(X_train, y_train)



In [99]:
predictions = model.predict(X_test)
predictedNames = getLabeledResult(predictions)
testNames = getLabeledResult(Y_test)
print(confusion_matrix(testNames, predictedNames))
print(classification_report(testNames, predictedNames))

[[ 8  0  0  0  0  0  0]
 [ 0 10  0  0  0  0  0]
 [ 0  0  9  0  0  0  0]
 [ 0  0  0  9  0  0  0]
 [ 0  0  0  0  9  0  0]
 [ 0  0  0  0  0  7  0]
 [ 0  0  0  0  0  0  6]]
             precision    recall  f1-score   support

  Amphibian       1.00      1.00      1.00         8
       Bird       1.00      1.00      1.00        10
        Bug       1.00      1.00      1.00         9
       Fish       1.00      1.00      1.00         9
Invertebrat       1.00      1.00      1.00         9
     Mammal       1.00      1.00      1.00         7
    Reptile       1.00      1.00      1.00         6

avg / total       1.00      1.00      1.00        58

