### Exercise 9 - Random Forest Classifier

In [6]:
from si.io.csv_file import read_csv
from si.model_selection.split import train_test_split
from si.models.random_forest_classifier import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier
from si.metrics.accuracy import accuracy


In [7]:
data = read_csv('/home/pauloseixal/Github/si/datasets/iris/iris.csv', sep=',', features=True, label=True)
train, test = train_test_split(data, test_size=0.33, random_state=42)

In [8]:
model = RandomForestClassifier(n_estimators=5, min_sample_split=3, max_depth=3, mode='gini')
model.fit(train)
# print(model.predict(test))
print(model.score(test))


0.9795918367346939


Sklearn

In [9]:
model2 = SklearnRandomForestClassifier(n_estimators=5, min_samples_split=3, max_depth=3)
model2.fit(train.X, train.y)
print(accuracy(test.y, model2.predict(test.X)))

0.9795918367346939


### Exercise 10 - Stacking Classifier

In [10]:
from si.io.csv_file import read_csv
from si.model_selection.split import stratified_train_test_split
from si.models.knn_classifier import KNNClassifier
from si.models.logistic_regression import LogisticRegression
from si.models.decision_tree_classifier import DecisionTreeClassifier
from si.ensemble.stacking_classifier import StackingClassifier

In [11]:
data = read_csv('/home/pauloseixal/Github/si/datasets/breast_bin/breast-bin.csv', sep=",",features=True,label=True)
train, test = stratified_train_test_split(data, test_size=0.20, random_state=42)


In [12]:
#knnregressor
knn = KNNClassifier(k=5)

#logistic regression
lr=LogisticRegression(l2_penalty=0.1, alpha=0.1, max_iter=1000)

#decisiontreee
dt=DecisionTreeClassifier(min_sample_split=2, max_depth=10, mode='gini')

#final model
final_model=KNNClassifier(k=5)
modelos=[knn,lr,dt]
exercise=StackingClassifier(modelos,final_model)
exercise.fit(train)
print(exercise.score(test))

0.9784172661870504


Sklearn

In [13]:
#sklearn
from sklearn.ensemble import StackingClassifier as StackingClassifier_sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [14]:
#knnregressor
knn = KNeighborsClassifier(n_neighbors=5)

#logistic regression
lr=LogisticRegression(penalty='l2', C=0.1, max_iter=1000)

#decisiontreee
dt=DecisionTreeClassifier(min_samples_split=2, max_depth=10, criterion='gini')

#final model
final_model=KNeighborsClassifier(n_neighbors=5)
models=[('knn',knn),('lr',lr),('dt',dt)]
exercise=StackingClassifier_sklearn(estimators=models,final_estimator=final_model)
exercise.fit(train.X, train.y)
print(accuracy(test.y, exercise.predict(test.X)))


0.9856115107913669


### Exercise 11 - randomized_search_cv

In [15]:
from si.models.logistic_regression import LogisticRegression
from si.model_selection.randomized_search import randomized_search_cv
from si.model_selection.grid_search import grid_search_cv
from si.io.csv_file import read_csv
import numpy as np


# load the dataset
dataset = read_csv('/home/pauloseixal/Github/si/datasets/breast_bin/breast-bin.csv', sep=",",features=True,label=True)

# define the model
model = LogisticRegression()

# define the hyperparameter grid
hyperparameter_grid = {'l2_penalty': np.linspace(1, 10, 10),
                        'alpha': np.linspace(0.001, 0.0001, 100),
                        'max_iter': np.linspace(1000, 2000, 200),
                        }
# print(hyperparameter_grid)

# perform grid search cross validation
results = randomized_search_cv(model=model, dataset=dataset, hyperparameter_grid=hyperparameter_grid, cv=3, n_ite=10)

# print the results
print('Grid search results:\n')

print(f'Best avg score:\n {results["best_scores"]}')
print()
print(f'Best hyperparameters:\n {results["best_hyperparameters"]}')
print()
print(f'All scores:\n {results["scores"]}')
print()
print(f'All hyperparameters:\n {results["hyperparameters"]}')

Grid search results:

Best avg score:
 0.9669540229885057

Best hyperparameters:
 {'l2_penalty': 9.0, 'alpha': 0.0007090909090909091, 'max_iter': 1783.9195979899498}

All scores:
 [[0.9698275862068966, 0.9568965517241379, 0.9741379310344828], [0.9612068965517241, 0.9698275862068966, 0.9698275862068966], [0.9827586206896551, 0.9439655172413793, 0.9741379310344828], [0.9655172413793104, 0.9827586206896551, 0.9525862068965517], [0.9568965517241379, 0.9741379310344828, 0.9698275862068966], [0.9525862068965517, 0.9698275862068966, 0.978448275862069], [0.9655172413793104, 0.9655172413793104, 0.9698275862068966], [0.9741379310344828, 0.9439655172413793, 0.9827586206896551], [0.9741379310344828, 0.978448275862069, 0.9482758620689655], [0.9698275862068966, 0.9741379310344828, 0.9568965517241379]]

All hyperparameters:
 [{'l2_penalty': 9.0, 'alpha': 0.0007090909090909091, 'max_iter': 1783.9195979899498}, {'l2_penalty': 5.0, 'alpha': 0.0009454545454545454, 'max_iter': 1683.4170854271356}, {'l2_pe