#### Exercise 9 - Test RandomForestClassifier

In [8]:
from si.io.csv_file import read_csv
from si.model_selection.split import train_test_split
from si.models.random_forest_classifier import RandomForestClassifier

data = read_csv('../datasets/iris/iris.csv', sep=',', features=True, label=True)
train, test = train_test_split(data, test_size=0.33, random_state=42)
model = RandomForestClassifier(min_sample_split=3, max_depth=3, mode='gini', n_estimators=5)
model.fit(train)
print('SI implementation:', model.score(test))


from sklearn.ensemble import RandomForestClassifier as RFC

model = RFC(n_estimators=5, max_depth=3, min_samples_split=3)
model.fit(train.X, train.y)
print('sklearn implementation:', model.score(test.X, test.y))

SI implementation: 0.9795918367346939
sklearn implementation: 0.9795918367346939


#### Exercise 10 - Test Stacking Classifier ensemble

In [11]:
from si.io.csv_file import read_csv
from si.model_selection.split import stratified_train_test_split
from si.models.knn_classifier import KNNClassifier
from si.models.logistic_regression import LogisticRegression
from si.models.decision_tree_classifier import DecisionTreeClassifier
from si.ensemble.stacking_classifier import StackingClassifier
from si.metrics.accuracy import accuracy
from si.data.dataset import Dataset


data = read_csv('../datasets/breast_bin/breast-bin.csv', sep=",",features=True,label=True)
train, test = stratified_train_test_split(data, test_size=0.20, random_state=42)

#knnregressor
knn = KNNClassifier(k=5)

#logistic regression
lr=LogisticRegression(l2_penalty=0.1, alpha=0.1, max_iter=1000)

#decisiontreee
dt=DecisionTreeClassifier(min_sample_split=2, max_depth=10, mode='gini')

#final model
final_model=KNNClassifier(k=5)
modelos=[knn,lr,dt]
exercise=StackingClassifier(modelos,final_model)
exercise.fit(train)
print('SI implementation:', exercise.score(test))

#sklearn
from sklearn.ensemble import StackingClassifier as StackingClassifier_sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

#knnregressor
knn = KNeighborsClassifier(n_neighbors=5)

#logistic regression
lr=LogisticRegression(penalty='l2', C=0.1, max_iter=1000)

#decisiontreee
dt=DecisionTreeClassifier(min_samples_split=2, max_depth=10, criterion='gini')

#final model
final_model=KNeighborsClassifier(n_neighbors=5)
models=[('knn',knn),('lr',lr),('dt',dt)]
exercise=StackingClassifier_sklearn(estimators=models,final_estimator=final_model)
exercise.fit(train.X, train.y)
print('sklearn implementation:', accuracy(test.y, exercise.predict(test.X)))

SI implementation: 0.9784172661870504
sklearn implementation: 0.9856115107913669


#### Exercise11 - Test randomized_search_cv function.

In [4]:
from si.models.logistic_regression import LogisticRegression
# from si.model_selection.grid_search import grid_search_cv
from si.io.csv_file import read_csv
from si.model_selection.randomized_search import randomized_search_cv
import numpy as np 


# load the dataset
dataset = read_csv('../datasets/breast_bin/breast-bin.csv', sep=",",features=True,label=True)

# define the model
model = LogisticRegression()

# define the hyperparameter grid
hyperparameter_grid = {'l2_penalty': np.linspace(1, 10, 10),
                        'alpha': np.linspace(0.001, 0.0001, 100),
                        'max_iter': np.linspace(1000, 2000, 200),
                        }
# print(hyperparameter_grid)

# perform grid search cross validation
results = randomized_search_cv(model=model, dataset=dataset, hyperparameter_grid=hyperparameter_grid, cv=5, n_iter=10)

# print the results
print('Grid search results:\n')
print(f'Best score:\n {results["best_score"]}')
print()
print(f'Best hyperparameters:\n {results["best_hyperparameters"]}')
print()
print(f'All scores:\n {results["scores"]}')
print()
print(f'All hyperparameters:\n {results["hyperparameters"]}')

cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
cv: 5
Grid search results:

Best score:
 0.9683453237410072

Best hyperparameters:
 {'l2_penalty': 9.0, 'alpha': 0.0004909090909090909, 'max_iter': 1296.4824120603016}

All scores:
 [[0.9784172661870504, 0.9424460431654677, 0.9640287769784173, 0.9856115107913669, 0.9640287769784173], [0.9640287769784173, 0.9784172661870504, 0.9640287769784173, 0.9568345323741008, 0.9712230215827338], [0.9568345323741008, 0.9640287769784173, 0.9712230215827338, 0.9640287769784173, 0.9784172661870504], [0.9640287769784173, 0.9640287769784173, 0.9856115107913669, 0.9568345323741008, 0.9640287769784173], [0.9784172661870504, 0.9712230215827338, 0.9640287769784173, 0.9568345323741008, 0.9640287769784173], [0.9784172661870504, 0.9568345323741008, 0.9640287769784173, 0.9568345323741008, 0.9784172661870504], [0.9712230215827338, 0.9712230215827338, 0.9568345323741008, 0.9928057553956835, 0.94964028