In [1]:
from sklearn.model_selection import KFold
from data_pre_processing.fetch_data import *
from sklearn.tree import DecisionTreeRegressor
from data_pre_processing.fill_missing_values import fill_ratings_with_mean_per_user
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

In [2]:
X, y = get_X_Y_train()
X, _ = fill_ratings_with_mean_per_user(X)
X = X[:,1,:] # only ratings
y = (y>3).astype(int) - (y<=3).astype(int)

In [3]:
X_train, y_train, X_validation, y_validation = random_partition(X, y, training_fraction=0.8)

n_train_samples = X_train.shape[0]
n_test_samples = X_validation.shape[0]

## AdaBoost

In [4]:
def misclassification_error(y_hat, y):
    return np.sum(~np.equal(np.sign(y_hat), np.sign(y))) / len(y)

weights = np.repeat(1, n_train_samples)

boosted_train_predictions = np.repeat(0, n_train_samples)
boosted_validation_predictions = np.repeat(0, n_test_samples)

train_misclassication_errors = []
validation_misclassication_errors = []

for t in range(1000):
    tree = DecisionTreeClassifier(max_depth=2, min_samples_leaf=10)
    tree.fit(X_train, y_train, sample_weight=weights)
    y_hat_t = tree.predict(X_train)

    err_t = np.sum(weights[~np.equal(np.sign(y_hat_t), np.sign(y_train))]) / np.sum(weights)
    alpha_t = 0.5*np.log((1-err_t)/err_t)
    
    boosted_train_predictions = boosted_train_predictions + alpha_t*y_hat_t
    boosted_validation_predictions = boosted_validation_predictions + alpha_t*tree.predict(X_validation)
    
    weights = np.multiply(weights, np.exp(-alpha_t*np.multiply(y_hat_t, y_train)))
    weights = (weights / np.sum(weights))*n_train_samples

    train_misclassication_errors.append(misclassification_error(boosted_train_predictions, y_train))
    validation_misclassication_errors.append(misclassification_error(boosted_validation_predictions, y_validation))
    
    print("Train: ", train_misclassication_errors[-1], "Test: ", validation_misclassication_errors[-1])
    

Train:  0.302875 Test:  0.305
Train:  0.302875 Test:  0.305
Train:  0.302875 Test:  0.305
Train:  0.29 Test:  0.2835
Train:  0.290875 Test:  0.287
Train:  0.2885 Test:  0.2835
Train:  0.289625 Test:  0.2845
Train:  0.287625 Test:  0.2825
Train:  0.287 Test:  0.2835
Train:  0.28725 Test:  0.2845
Train:  0.286625 Test:  0.2825
Train:  0.28925 Test:  0.2865
Train:  0.283125 Test:  0.281
Train:  0.286875 Test:  0.286
Train:  0.27975 Test:  0.276
Train:  0.283125 Test:  0.2805
Train:  0.27875 Test:  0.276
Train:  0.281625 Test:  0.2805
Train:  0.27775 Test:  0.275
Train:  0.274875 Test:  0.279
Train:  0.2775 Test:  0.2785
Train:  0.27525 Test:  0.2775
Train:  0.277625 Test:  0.2775
Train:  0.279125 Test:  0.279
Train:  0.27525 Test:  0.277
Train:  0.277375 Test:  0.277
Train:  0.275375 Test:  0.277
Train:  0.27775 Test:  0.279
Train:  0.27575 Test:  0.2765
Train:  0.273125 Test:  0.2735
Train:  0.2755 Test:  0.272
Train:  0.274125 Test:  0.274
Train:  0.27425 Test:  0.2705
Train:  0.275375 

Train:  0.246625 Test:  0.264
Train:  0.246625 Test:  0.267
Train:  0.245875 Test:  0.266
Train:  0.246 Test:  0.2665
Train:  0.246125 Test:  0.2655
Train:  0.245875 Test:  0.268
Train:  0.245375 Test:  0.265
Train:  0.246375 Test:  0.265
Train:  0.246 Test:  0.2665
Train:  0.245125 Test:  0.2655
Train:  0.246 Test:  0.265
Train:  0.2455 Test:  0.265
Train:  0.24625 Test:  0.2645
Train:  0.246 Test:  0.2665
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.24525 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.24525 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.24625 Test:  0.2655
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.245375 Test:  0.2655
Train:  0.2455 Test:  0.2655
Train:  0.246375 Test

Train:  0.23 Test:  0.2735
Train:  0.2295 Test:  0.274
Train:  0.23 Test:  0.2735
Train:  0.2295 Test:  0.274
Train:  0.23 Test:  0.2735
Train:  0.229375 Test:  0.274
Train:  0.229875 Test:  0.2735
Train:  0.22925 Test:  0.274
Train:  0.22975 Test:  0.2735
Train:  0.229625 Test:  0.274
Train:  0.22975 Test:  0.2735
Train:  0.229625 Test:  0.274
Train:  0.22975 Test:  0.2735
Train:  0.229625 Test:  0.274
Train:  0.229875 Test:  0.2735
Train:  0.2295 Test:  0.274
Train:  0.229875 Test:  0.2735
Train:  0.229625 Test:  0.274
Train:  0.229875 Test:  0.2735
Train:  0.229625 Test:  0.274
Train:  0.22975 Test:  0.2735
Train:  0.229625 Test:  0.274
Train:  0.2295 Test:  0.273
Train:  0.2295 Test:  0.2745
Train:  0.228875 Test:  0.274
Train:  0.22925 Test:  0.275
Train:  0.228375 Test:  0.2745
Train:  0.23 Test:  0.274
Train:  0.229125 Test:  0.274
Train:  0.2275 Test:  0.2735
Train:  0.227625 Test:  0.2745
Train:  0.228 Test:  0.2735
Train:  0.22775 Test:  0.2735
Train:  0.228 Test:  0.2735
Tra

Train:  0.218625 Test:  0.274
Train:  0.218375 Test:  0.275
Train:  0.21825 Test:  0.2745
Train:  0.2185 Test:  0.275
Train:  0.218875 Test:  0.2745
Train:  0.218875 Test:  0.275
Train:  0.2185 Test:  0.2735
Train:  0.218375 Test:  0.2755
Train:  0.218625 Test:  0.2735
Train:  0.218875 Test:  0.275
Train:  0.218625 Test:  0.2735
Train:  0.218625 Test:  0.275
Train:  0.218625 Test:  0.275
Train:  0.218 Test:  0.275
Train:  0.218375 Test:  0.2735
Train:  0.218625 Test:  0.274
Train:  0.217625 Test:  0.274
Train:  0.218875 Test:  0.274
Train:  0.218 Test:  0.274
Train:  0.21875 Test:  0.274
Train:  0.21725 Test:  0.2735
Train:  0.21825 Test:  0.274
Train:  0.21725 Test:  0.2735
Train:  0.217625 Test:  0.2745
Train:  0.21775 Test:  0.2735
Train:  0.217375 Test:  0.2745
Train:  0.218125 Test:  0.274
Train:  0.217875 Test:  0.277
Train:  0.2175 Test:  0.274
Train:  0.21825 Test:  0.2755
Train:  0.21725 Test:  0.2745
Train:  0.218 Test:  0.275
Train:  0.217875 Test:  0.275
Train:  0.217625 Te

In [None]:
line_search_train_misclassication_errors = train_misclassication_errors
line_search_validation_misclassication_errors = validation_misclassication_errors


iterations = [i for i in range(1000)]
plt.figure(figsize=(10,8))
plt.title("Train and Validation Misclassification Errors in each Iteration")
plt.plot(iterations, line_search_train_misclassication_errors)
plt.plot(iterations, line_search_validation_misclassication_errors)
plt.legend(['training misclassification error', 'validation misclassification error'], loc='lower left')
plt.xlabel("Iteration")
plt.ylabel("Misclassification Error")
plt.show()