In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

featuresDf = pd.read_pickle('./dataset/featuresDF.pkl')

In [2]:
featuresDf['fourier_mfcc'] = [np.concatenate([featuresDf.fourier[i],
                              featuresDf.mfcc[i]]) for i in range(len(featuresDf))]
                     
X = np.array(featuresDf['fourier_mfcc'].tolist())
y = np.array(featuresDf['class'].tolist())
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.2)

# Model testing:
models = {'LogisticRegression': LogisticRegression(solver='lbfgs',multi_class='auto',max_iter=1000),
          'SVC': SVC(gamma='auto'),
          'KNeighborsClassifier': KNeighborsClassifier(3),
          'RandomForestClassifier': RandomForestClassifier(n_estimators=100),
          'DecisionTreeClassifier': DecisionTreeClassifier(),
          'GradientBoostingClassifier': GradientBoostingClassifier()}

# Scaled:
metrics = {}
for modelName, model in models.items():
    clf = model.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    metrics[modelName] = {'accuracy': round(accuracy_score(y_test, y_pred),2),
                          'precision': round(precision_score(y_test, y_pred, average='weighted'),2),
                          'recall': round(recall_score(y_test, y_pred, average='weighted'),2)}

  'precision', 'predicted', average, warn_for)


In [3]:
metrics

{'LogisticRegression': {'accuracy': 0.58, 'precision': 0.56, 'recall': 0.58},
 'SVC': {'accuracy': 0.51, 'precision': 0.26, 'recall': 0.51},
 'KNeighborsClassifier': {'accuracy': 0.6, 'precision': 0.6, 'recall': 0.6},
 'RandomForestClassifier': {'accuracy': 0.86,
  'precision': 0.88,
  'recall': 0.86},
 'DecisionTreeClassifier': {'accuracy': 0.78,
  'precision': 0.78,
  'recall': 0.78},
 'GradientBoostingClassifier': {'accuracy': 0.91,
  'precision': 0.91,
  'recall': 0.91}}

In [None]:
parameters = { 
    'n_estimators': [500, 1000],
    'learning_rate' :[0.3]}
gbc = GradientBoostingClassifier() 
clf = GridSearchCV(gbc, parameters, cv=3, scoring='accuracy', verbose=10, n_jobs= -1)
clf.fit(X, y)
print(clf.best_estimator_)
print(clf.best_score_) 
print(clf.best_params_)

In [None]:
import h2o
from h2o.automl import H2OAutoML
# H2O AutoML:
h2o.init(nthreads = -1, max_mem_size = 6)

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM 18.9 (build 11.0.5+10-LTS, mixed mode)
  Starting server from C:\ProgramData\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\ECOLOG~1\AppData\Local\Temp\tmp6uhfyosb
  JVM stdout: C:\Users\ECOLOG~1\AppData\Local\Temp\tmp6uhfyosb\h2o_Ecología_started_from_python.out
  JVM stderr: C:\Users\ECOLOG~1\AppData\Local\Temp\tmp6uhfyosb\h2o_Ecología_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 .......

In [None]:
hf = h2o.H2OFrame(featuresDf)