## Function to show the results

In [None]:
import matplotlib.pyplot as plt

def show_results(test, pred):
    x_ax = range(len(test))
    plt.plot(x_ax, test, linewidth=1, label="original")
    plt.plot(x_ax, pred, linewidth=1.1, label="predicted")
    plt.title("y-test and y-predicted data")
    plt.xlabel('X-axis')
    plt.ylabel('Y-axis')
    plt.legend(loc='best',fancybox=True, shadow=True)
    plt.grid(True)
    plt.show() 

## Get the data 

In [None]:
from Preprocessing import getData
x_train, x_val, x_test, y_train, y_val, y_test = getData()

x_train = x_train.squeeze()
x_test = x_test.squeeze()
x_test = x_test.squeeze()
y_test = y_test.squeeze()

## Version brut

In [None]:
from sklearn import tree
from sklearn.metrics import accuracy_score

# # decision tree
dt = tree.DecisionTreeClassifier(max_depth=5, random_state=0)

dt.fit(x_train, y_train)
score = dt.score(x_train, y_train)
print('score:', score)

pred = dt.predict(x_test)
accuracy = accuracy_score(y_test, pred)
print('accuracy:', accuracy)

show_results(y_test, pred)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier

pipe = Pipeline(steps=[
                    # ('std_slc', StandardScaler()),
                    # ('pca', pca),
                    # ('dec_tree', tree.DecisionTreeClassifier()),
                    ('dec_tree', GradientBoostingClassifier()),
                ])

params = dict(
            # dec_tree__criterion = ['gini', 'entropy'],
            dec_tree__criterion = ['friedman_mse', 'squared_error'],
            dec_tree__max_depth = [i for i in range (1, 20)]
            )


In [None]:
gsc = GridSearchCV(pipe, params)
gsc.fit(x_train, y_train)

# pca__n_components = gsc.best_estimator_.get_params()['pca__n_components']
dec_tree__criterion = gsc.best_estimator_.get_params()['dec_tree__criterion']
dec_tree__max_depth = gsc.best_estimator_.get_params()['dec_tree__max_depth']

# print('Best Number Of Components:', pca__n_components)
print('Best Criterion:', dec_tree__criterion)
print('Best max_depth:', dec_tree__max_depth)

shortcut

In [None]:
dec_tree__criterion = 'gini'
dec_tree__max_depth = 3

In [None]:
dt = tree.DecisionTreeClassifier(criterion=dec_tree__criterion, max_depth=dec_tree__max_depth)
dt.fit(x_train, y_train)

print('score:', dt.score(x_train, y_train))

In [None]:
from sklearn.metrics import accuracy_score

dt2 = GradientBoostingClassifier()
dt2.fit(x_train, y_train)
y_pred = dt2.predict(x_test)
y_pred2 = gsc.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
accuracy2 = accuracy_score(y_test, y_pred2)

print('accuracy dt:', accuracy)
show_results(y_test, y_pred)
print('accuracy gs:', accuracy2)
show_results(y_test, y_pred2)