In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import numpy as np
from data_utils import *
from tabulate import tabulate
from naive_regresor import NaiveRegresor, NaiveRegresor_WithNoDate, NaiveRegresor_WithDateSubstraction
from auto_ml_predictor import AutoMl_Regresor
from fill_missing_values import fill_with_mean, fill_with_zeros
from PCA_regresor import PCAOnlyRatingsRegresor

In [4]:
X,Y = get_X_Y_train()
movies_dates = get_movies_dates()
ouput_movie_date = get_output_movie_date()

In [5]:
def fill_with_none(X):
    return X
filling_nan_methods = dict()
filling_nan_methods['none'] = fill_with_none
filling_nan_methods['with_mean'] = fill_with_mean
filling_nan_methods['with_zeros'] = fill_with_zeros

In [6]:
def get_filled_data(fill_method,X_train,X_test):
    XFilled = fill_method(np.concatenate((X_train,X_test), axis=0))
    X_train_filled = XFilled[:X_train.shape[0],:,:]
    X_test_filled = XFilled[X_train.shape[0]:,:,:]
    return X_train_filled, X_test_filled

In [7]:
def score_R2(y_predicted,y_actual):
    from sklearn.metrics import mean_squared_error
    from math import sqrt
    return sqrt(mean_squared_error(y_actual, y_predicted))

In [8]:
scores = dict()
def get_min_score(k):
    return min([x for x in scores[k] if isinstance(x, (int, float, complex))])

In [9]:
number_of_tests = 10
def check_predictor(pred, pred_name, needFill=None):
    my_scores = []
    for k,v in filling_nan_methods.items():
        cur_score = 0
        if needFill is None or (k!='none') == needFill:
            for _ in range(number_of_tests):
                X_train, Y_train, X_test, Y_test = random_partition(X,Y)
                X_tr, X_te = get_filled_data(v,X_train,X_test)
                pred.fit(X_tr, Y_train)
                Y_pred = pred.predict(X_te, Y_test[:,0])
                cur_score += score_R2(Y_pred, Y_test[:,1])
            cur_score/=number_of_tests
        else:
            cur_score = 'inf'
        my_scores.append(cur_score)
    scores[pred_name] = my_scores 
    print("min score: ", get_min_score(pred_name), " ", pred_name)

In [10]:
check_predictor(NaiveRegresor(), 'NaiveRegresor', needFill=True)

min score:  0.7677645442350378   NaiveRegresor


In [11]:
check_predictor(NaiveRegresor_WithNoDate(), 'NaiveRegresor_WithNoDate', needFill=True)

min score:  0.7693131941789958   NaiveRegresor_WithNoDate


In [12]:
predictor = NaiveRegresor_WithDateSubstraction(movies_dates, ouput_movie_date)
check_predictor(predictor, 'NaiveRegresor_WithDateSubstraction', needFill=True)

min score:  0.7667947638612205   NaiveRegresor_WithDateSubstraction


In [13]:
check_predictor(AutoMl_Regresor(), 'AutoMl_Regresor')

Welcome to auto_ml! We're about to go through and make sense of your data using machine learning, and give you a production-ready pipeline to get predictions with.

If you have any issues, or new feature ideas, let us know at http://auto.ml
You are running on version 2.9.10
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
Running basic data cleaning
Fitting DataFrameVectorizer
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
[1] random_holdout_set_from_training_data's score is: -0.895
[2] random_holdout_set_from_training_data's score is: -0.877
[3] random_holdout_set_from_training_data's score is: -0.862
[4] random

[26] random_holdout_set_from_training_data's score is: -0.77
[27] random_holdout_set_from_training_data's score is: -0.769
[28] random_holdout_set_from_training_data's score is: -0.769
[29] random_holdout_set_from_training_data's score is: -0.768
[30] random_holdout_set_from_training_data's score is: -0.768
[31] random_holdout_set_from_training_data's score is: -0.767
[32] random_holdout_set_from_training_data's score is: -0.766
[33] random_holdout_set_from_training_data's score is: -0.766
[34] random_holdout_set_from_training_data's score is: -0.765
[35] random_holdout_set_from_training_data's score is: -0.764
[36] random_holdout_set_from_training_data's score is: -0.763
[37] random_holdout_set_from_training_data's score is: -0.763
[38] random_holdout_set_from_training_data's score is: -0.762
[39] random_holdout_set_from_training_data's score is: -0.762
[40] random_holdout_set_from_training_data's score is: -0.762
[41] random_holdout_set_from_training_data's score is: -0.761
[42] rand

[62] random_holdout_set_from_training_data's score is: -0.791
[64] random_holdout_set_from_training_data's score is: -0.79
[66] random_holdout_set_from_training_data's score is: -0.79
[68] random_holdout_set_from_training_data's score is: -0.79
[70] random_holdout_set_from_training_data's score is: -0.791
[72] random_holdout_set_from_training_data's score is: -0.791
[74] random_holdout_set_from_training_data's score is: -0.79
[76] random_holdout_set_from_training_data's score is: -0.79
[78] random_holdout_set_from_training_data's score is: -0.79
[80] random_holdout_set_from_training_data's score is: -0.791
[82] random_holdout_set_from_training_data's score is: -0.79
[84] random_holdout_set_from_training_data's score is: -0.791
[86] random_holdout_set_from_training_data's score is: -0.791
[88] random_holdout_set_from_training_data's score is: -0.791
[90] random_holdout_set_from_training_data's score is: -0.79
[92] random_holdout_set_from_training_data's score is: -0.791
[94] random_hold

[115] random_holdout_set_from_training_data's score is: -0.782
[118] random_holdout_set_from_training_data's score is: -0.782
[121] random_holdout_set_from_training_data's score is: -0.782
[124] random_holdout_set_from_training_data's score is: -0.782
[127] random_holdout_set_from_training_data's score is: -0.782
[130] random_holdout_set_from_training_data's score is: -0.782
[133] random_holdout_set_from_training_data's score is: -0.782
[136] random_holdout_set_from_training_data's score is: -0.782
[139] random_holdout_set_from_training_data's score is: -0.782
[142] random_holdout_set_from_training_data's score is: -0.782
[145] random_holdout_set_from_training_data's score is: -0.782
[148] random_holdout_set_from_training_data's score is: -0.783
[151] random_holdout_set_from_training_data's score is: -0.783
[154] random_holdout_set_from_training_data's score is: -0.783
[157] random_holdout_set_from_training_data's score is: -0.783
[160] random_holdout_set_from_training_data's score is:

[2] random_holdout_set_from_training_data's score is: -0.914
[3] random_holdout_set_from_training_data's score is: -0.897
[4] random_holdout_set_from_training_data's score is: -0.883
[5] random_holdout_set_from_training_data's score is: -0.87
[6] random_holdout_set_from_training_data's score is: -0.858
[7] random_holdout_set_from_training_data's score is: -0.849
[8] random_holdout_set_from_training_data's score is: -0.841
[9] random_holdout_set_from_training_data's score is: -0.834
[10] random_holdout_set_from_training_data's score is: -0.828
[11] random_holdout_set_from_training_data's score is: -0.823
[12] random_holdout_set_from_training_data's score is: -0.819
[13] random_holdout_set_from_training_data's score is: -0.814
[14] random_holdout_set_from_training_data's score is: -0.81
[15] random_holdout_set_from_training_data's score is: -0.807
[16] random_holdout_set_from_training_data's score is: -0.804
[17] random_holdout_set_from_training_data's score is: -0.801
[18] random_holdou

[40] random_holdout_set_from_training_data's score is: -0.744
[41] random_holdout_set_from_training_data's score is: -0.743
[42] random_holdout_set_from_training_data's score is: -0.743
[43] random_holdout_set_from_training_data's score is: -0.743
[44] random_holdout_set_from_training_data's score is: -0.742
[45] random_holdout_set_from_training_data's score is: -0.742
[46] random_holdout_set_from_training_data's score is: -0.742
[47] random_holdout_set_from_training_data's score is: -0.742
[48] random_holdout_set_from_training_data's score is: -0.742
[49] random_holdout_set_from_training_data's score is: -0.743
[50] random_holdout_set_from_training_data's score is: -0.742
[52] random_holdout_set_from_training_data's score is: -0.742
[54] random_holdout_set_from_training_data's score is: -0.743
[56] random_holdout_set_from_training_data's score is: -0.742
[58] random_holdout_set_from_training_data's score is: -0.742
[60] random_holdout_set_from_training_data's score is: -0.741
[62] ran

[118] random_holdout_set_from_training_data's score is: -0.761
[121] random_holdout_set_from_training_data's score is: -0.761
[124] random_holdout_set_from_training_data's score is: -0.762
The number of estimators that were the best for this training dataset: 76
The best score on the holdout set: -0.7611184686998553


Here are the results from our GradientBoostingRegressor
predicting OUT
Calculating feature responses, for advanced analytics.
Welcome to auto_ml! We're about to go through and make sense of your data using machine learning, and give you a production-ready pipeline to get predictions with.

If you have any issues, or new feature ideas, let us know at http://auto.ml
You are running on version 2.9.10
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
Running basic data cleaning
Fitting DataF

Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
[1] random_holdout_set_from_training_data's score is: -0.927
[2] random_holdout_set_from_training_data's score is: -0.907
[3] random_holdout_set_from_training_data's score is: -0.891
[4] random_holdout_set_from_training_data's score is: -0.876
[5] random_holdout_set_from_training_data's score is: -0.865
[6] random_holdout_set_from_training_data's score is: -0.855
[7] random_holdout_set_from_training_data's score is: -0.846
[8] random_holdout_set_from_training_data's score is: -0.836
[9] random_holdout_set_from_training_data's score is: -0.828
[10] random_holdout_set_from_training_data's score is: -0.821
[11] random_holdout_set_from_training_data's score is: -0.815
[12] random_holdout_set_from_training_data's score is: -0.81
[13] random_holdout_set_from

[30] random_holdout_set_from_training_data's score is: -0.77
[31] random_holdout_set_from_training_data's score is: -0.769
[32] random_holdout_set_from_training_data's score is: -0.769
[33] random_holdout_set_from_training_data's score is: -0.768
[34] random_holdout_set_from_training_data's score is: -0.767
[35] random_holdout_set_from_training_data's score is: -0.767
[36] random_holdout_set_from_training_data's score is: -0.767
[37] random_holdout_set_from_training_data's score is: -0.766
[38] random_holdout_set_from_training_data's score is: -0.765
[39] random_holdout_set_from_training_data's score is: -0.764
[40] random_holdout_set_from_training_data's score is: -0.763
[41] random_holdout_set_from_training_data's score is: -0.763
[42] random_holdout_set_from_training_data's score is: -0.762
[43] random_holdout_set_from_training_data's score is: -0.763
[44] random_holdout_set_from_training_data's score is: -0.763
[45] random_holdout_set_from_training_data's score is: -0.762
[46] rand

[52] random_holdout_set_from_training_data's score is: -0.769
[54] random_holdout_set_from_training_data's score is: -0.769
[56] random_holdout_set_from_training_data's score is: -0.769
[58] random_holdout_set_from_training_data's score is: -0.769
[60] random_holdout_set_from_training_data's score is: -0.769
[62] random_holdout_set_from_training_data's score is: -0.769
[64] random_holdout_set_from_training_data's score is: -0.769
[66] random_holdout_set_from_training_data's score is: -0.769
[68] random_holdout_set_from_training_data's score is: -0.769
[70] random_holdout_set_from_training_data's score is: -0.769
[72] random_holdout_set_from_training_data's score is: -0.769
[74] random_holdout_set_from_training_data's score is: -0.769
[76] random_holdout_set_from_training_data's score is: -0.767
[78] random_holdout_set_from_training_data's score is: -0.768
[80] random_holdout_set_from_training_data's score is: -0.768
[82] random_holdout_set_from_training_data's score is: -0.769
[84] ran

[127] random_holdout_set_from_training_data's score is: -0.761
[130] random_holdout_set_from_training_data's score is: -0.761
[133] random_holdout_set_from_training_data's score is: -0.761
[136] random_holdout_set_from_training_data's score is: -0.761
[139] random_holdout_set_from_training_data's score is: -0.76
[142] random_holdout_set_from_training_data's score is: -0.76
[145] random_holdout_set_from_training_data's score is: -0.761
[148] random_holdout_set_from_training_data's score is: -0.761
[151] random_holdout_set_from_training_data's score is: -0.761
[154] random_holdout_set_from_training_data's score is: -0.762
[157] random_holdout_set_from_training_data's score is: -0.761
[160] random_holdout_set_from_training_data's score is: -0.761
[163] random_holdout_set_from_training_data's score is: -0.76
[166] random_holdout_set_from_training_data's score is: -0.761
[169] random_holdout_set_from_training_data's score is: -0.761
[172] random_holdout_set_from_training_data's score is: -0

[98] random_holdout_set_from_training_data's score is: -0.789
[100] random_holdout_set_from_training_data's score is: -0.789
The number of estimators that were the best for this training dataset: 60
The best score on the holdout set: -0.7881941756493496


Here are the results from our GradientBoostingRegressor
predicting OUT
Calculating feature responses, for advanced analytics.
Welcome to auto_ml! We're about to go through and make sense of your data using machine learning, and give you a production-ready pipeline to get predictions with.

If you have any issues, or new feature ideas, let us know at http://auto.ml
You are running on version 2.9.10
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
Running basic data cleaning
Fitting DataFrameVectorizer
Now using the model training_params that you pass

[19] random_holdout_set_from_training_data's score is: -0.799
[20] random_holdout_set_from_training_data's score is: -0.797
[21] random_holdout_set_from_training_data's score is: -0.795
[22] random_holdout_set_from_training_data's score is: -0.793
[23] random_holdout_set_from_training_data's score is: -0.791
[24] random_holdout_set_from_training_data's score is: -0.79
[25] random_holdout_set_from_training_data's score is: -0.789
[26] random_holdout_set_from_training_data's score is: -0.787
[27] random_holdout_set_from_training_data's score is: -0.786
[28] random_holdout_set_from_training_data's score is: -0.785
[29] random_holdout_set_from_training_data's score is: -0.785
[30] random_holdout_set_from_training_data's score is: -0.784
[31] random_holdout_set_from_training_data's score is: -0.783
[32] random_holdout_set_from_training_data's score is: -0.782
[33] random_holdout_set_from_training_data's score is: -0.781
[34] random_holdout_set_from_training_data's score is: -0.781
[35] rand

[32] random_holdout_set_from_training_data's score is: -0.759
[33] random_holdout_set_from_training_data's score is: -0.758
[34] random_holdout_set_from_training_data's score is: -0.757
[35] random_holdout_set_from_training_data's score is: -0.756
[36] random_holdout_set_from_training_data's score is: -0.756
[37] random_holdout_set_from_training_data's score is: -0.755
[38] random_holdout_set_from_training_data's score is: -0.755
[39] random_holdout_set_from_training_data's score is: -0.755
[40] random_holdout_set_from_training_data's score is: -0.754
[41] random_holdout_set_from_training_data's score is: -0.754
[42] random_holdout_set_from_training_data's score is: -0.754
[43] random_holdout_set_from_training_data's score is: -0.753
[44] random_holdout_set_from_training_data's score is: -0.753
[45] random_holdout_set_from_training_data's score is: -0.753
[46] random_holdout_set_from_training_data's score is: -0.752
[47] random_holdout_set_from_training_data's score is: -0.752
[48] ran

[94] random_holdout_set_from_training_data's score is: -0.773
[96] random_holdout_set_from_training_data's score is: -0.773
[98] random_holdout_set_from_training_data's score is: -0.773
[100] random_holdout_set_from_training_data's score is: -0.773
[103] random_holdout_set_from_training_data's score is: -0.773
[106] random_holdout_set_from_training_data's score is: -0.773
[109] random_holdout_set_from_training_data's score is: -0.773
[112] random_holdout_set_from_training_data's score is: -0.773
[115] random_holdout_set_from_training_data's score is: -0.774
[118] random_holdout_set_from_training_data's score is: -0.774
[121] random_holdout_set_from_training_data's score is: -0.775
[124] random_holdout_set_from_training_data's score is: -0.774
[127] random_holdout_set_from_training_data's score is: -0.774
[130] random_holdout_set_from_training_data's score is: -0.774
[133] random_holdout_set_from_training_data's score is: -0.773
[136] random_holdout_set_from_training_data's score is: -0

[68] random_holdout_set_from_training_data's score is: -0.762
[70] random_holdout_set_from_training_data's score is: -0.761
[72] random_holdout_set_from_training_data's score is: -0.761
[74] random_holdout_set_from_training_data's score is: -0.76
[76] random_holdout_set_from_training_data's score is: -0.76
[78] random_holdout_set_from_training_data's score is: -0.76
[80] random_holdout_set_from_training_data's score is: -0.759
[82] random_holdout_set_from_training_data's score is: -0.759
[84] random_holdout_set_from_training_data's score is: -0.759
[86] random_holdout_set_from_training_data's score is: -0.759
[88] random_holdout_set_from_training_data's score is: -0.759
[90] random_holdout_set_from_training_data's score is: -0.758
[92] random_holdout_set_from_training_data's score is: -0.758
[94] random_holdout_set_from_training_data's score is: -0.758
[96] random_holdout_set_from_training_data's score is: -0.758
[98] random_holdout_set_from_training_data's score is: -0.758
[100] rando

[80] random_holdout_set_from_training_data's score is: -0.765
[82] random_holdout_set_from_training_data's score is: -0.765
[84] random_holdout_set_from_training_data's score is: -0.764
[86] random_holdout_set_from_training_data's score is: -0.765
[88] random_holdout_set_from_training_data's score is: -0.764
[90] random_holdout_set_from_training_data's score is: -0.764
[92] random_holdout_set_from_training_data's score is: -0.764
[94] random_holdout_set_from_training_data's score is: -0.765
[96] random_holdout_set_from_training_data's score is: -0.765
[98] random_holdout_set_from_training_data's score is: -0.765
[100] random_holdout_set_from_training_data's score is: -0.765
[103] random_holdout_set_from_training_data's score is: -0.764
[106] random_holdout_set_from_training_data's score is: -0.764
[109] random_holdout_set_from_training_data's score is: -0.765
[112] random_holdout_set_from_training_data's score is: -0.765
The number of estimators that were the best for this training dat

[6] random_holdout_set_from_training_data's score is: -0.875
[7] random_holdout_set_from_training_data's score is: -0.865
[8] random_holdout_set_from_training_data's score is: -0.854
[9] random_holdout_set_from_training_data's score is: -0.846
[10] random_holdout_set_from_training_data's score is: -0.841
[11] random_holdout_set_from_training_data's score is: -0.833
[12] random_holdout_set_from_training_data's score is: -0.826
[13] random_holdout_set_from_training_data's score is: -0.821
[14] random_holdout_set_from_training_data's score is: -0.816
[15] random_holdout_set_from_training_data's score is: -0.812
[16] random_holdout_set_from_training_data's score is: -0.808
[17] random_holdout_set_from_training_data's score is: -0.805
[18] random_holdout_set_from_training_data's score is: -0.802
[19] random_holdout_set_from_training_data's score is: -0.8
[20] random_holdout_set_from_training_data's score is: -0.798
[21] random_holdout_set_from_training_data's score is: -0.796
[22] random_ho

[24] random_holdout_set_from_training_data's score is: -0.8
[25] random_holdout_set_from_training_data's score is: -0.799
[26] random_holdout_set_from_training_data's score is: -0.798
[27] random_holdout_set_from_training_data's score is: -0.797
[28] random_holdout_set_from_training_data's score is: -0.796
[29] random_holdout_set_from_training_data's score is: -0.795
[30] random_holdout_set_from_training_data's score is: -0.794
[31] random_holdout_set_from_training_data's score is: -0.793
[32] random_holdout_set_from_training_data's score is: -0.793
[33] random_holdout_set_from_training_data's score is: -0.792
[34] random_holdout_set_from_training_data's score is: -0.792
[35] random_holdout_set_from_training_data's score is: -0.792
[36] random_holdout_set_from_training_data's score is: -0.792
[37] random_holdout_set_from_training_data's score is: -0.792
[38] random_holdout_set_from_training_data's score is: -0.791
[39] random_holdout_set_from_training_data's score is: -0.791
[40] rando

[52] random_holdout_set_from_training_data's score is: -0.771
[54] random_holdout_set_from_training_data's score is: -0.77
[56] random_holdout_set_from_training_data's score is: -0.77
[58] random_holdout_set_from_training_data's score is: -0.77
[60] random_holdout_set_from_training_data's score is: -0.77
[62] random_holdout_set_from_training_data's score is: -0.77
[64] random_holdout_set_from_training_data's score is: -0.77
[66] random_holdout_set_from_training_data's score is: -0.77
[68] random_holdout_set_from_training_data's score is: -0.769
[70] random_holdout_set_from_training_data's score is: -0.77
[72] random_holdout_set_from_training_data's score is: -0.769
[74] random_holdout_set_from_training_data's score is: -0.769
[76] random_holdout_set_from_training_data's score is: -0.769
[78] random_holdout_set_from_training_data's score is: -0.769
[80] random_holdout_set_from_training_data's score is: -0.77
[82] random_holdout_set_from_training_data's score is: -0.77
[84] random_holdou

Running basic data cleaning
Fitting DataFrameVectorizer
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
[1] random_holdout_set_from_training_data's score is: -0.921
[2] random_holdout_set_from_training_data's score is: -0.903
[3] random_holdout_set_from_training_data's score is: -0.886
[4] random_holdout_set_from_training_data's score is: -0.871
[5] random_holdout_set_from_training_data's score is: -0.858
[6] random_holdout_set_from_training_data's score is: -0.846
[7] random_holdout_set_from_training_data's score is: -0.835
[8] random_holdout_set_from_training_data's score is: -0.827
[9] random_holdout_set_from_training_data's score is: -0.818
[10] random_holdout_set_from_training_data's score is: -0.811
[11] random_holdout_set_from_training_data's score is: -0.806
[12] random_holdout_set_from_trai

Welcome to auto_ml! We're about to go through and make sense of your data using machine learning, and give you a production-ready pipeline to get predictions with.

If you have any issues, or new feature ideas, let us know at http://auto.ml
You are running on version 2.9.10
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
Running basic data cleaning
Fitting DataFrameVectorizer
Now using the model training_params that you passed in:
{}
After overwriting our defaults with your values, here are the final params that will be used to initialize the model:
{'presort': False, 'learning_rate': 0.1, 'warm_start': True}
[1] random_holdout_set_from_training_data's score is: -0.951
[2] random_holdout_set_from_training_data's score is: -0.932
[3] random_holdout_set_from_training_data's score is: -0.912
[4] random

[1] random_holdout_set_from_training_data's score is: -0.921
[2] random_holdout_set_from_training_data's score is: -0.901
[3] random_holdout_set_from_training_data's score is: -0.883
[4] random_holdout_set_from_training_data's score is: -0.868
[5] random_holdout_set_from_training_data's score is: -0.854
[6] random_holdout_set_from_training_data's score is: -0.842
[7] random_holdout_set_from_training_data's score is: -0.833
[8] random_holdout_set_from_training_data's score is: -0.824
[9] random_holdout_set_from_training_data's score is: -0.816
[10] random_holdout_set_from_training_data's score is: -0.809
[11] random_holdout_set_from_training_data's score is: -0.803
[12] random_holdout_set_from_training_data's score is: -0.798
[13] random_holdout_set_from_training_data's score is: -0.792
[14] random_holdout_set_from_training_data's score is: -0.787
[15] random_holdout_set_from_training_data's score is: -0.784
[16] random_holdout_set_from_training_data's score is: -0.78
[17] random_holdou

[25] random_holdout_set_from_training_data's score is: -0.791
[26] random_holdout_set_from_training_data's score is: -0.789
[27] random_holdout_set_from_training_data's score is: -0.788
[28] random_holdout_set_from_training_data's score is: -0.788
[29] random_holdout_set_from_training_data's score is: -0.786
[30] random_holdout_set_from_training_data's score is: -0.785
[31] random_holdout_set_from_training_data's score is: -0.784
[32] random_holdout_set_from_training_data's score is: -0.782
[33] random_holdout_set_from_training_data's score is: -0.782
[34] random_holdout_set_from_training_data's score is: -0.781
[35] random_holdout_set_from_training_data's score is: -0.78
[36] random_holdout_set_from_training_data's score is: -0.779
[37] random_holdout_set_from_training_data's score is: -0.779
[38] random_holdout_set_from_training_data's score is: -0.779
[39] random_holdout_set_from_training_data's score is: -0.778
[40] random_holdout_set_from_training_data's score is: -0.777
[41] rand

[68] random_holdout_set_from_training_data's score is: -0.787
[70] random_holdout_set_from_training_data's score is: -0.787
[72] random_holdout_set_from_training_data's score is: -0.787
[74] random_holdout_set_from_training_data's score is: -0.785
[76] random_holdout_set_from_training_data's score is: -0.784
[78] random_holdout_set_from_training_data's score is: -0.784
[80] random_holdout_set_from_training_data's score is: -0.784
[82] random_holdout_set_from_training_data's score is: -0.784
[84] random_holdout_set_from_training_data's score is: -0.784
[86] random_holdout_set_from_training_data's score is: -0.784
[88] random_holdout_set_from_training_data's score is: -0.784
[90] random_holdout_set_from_training_data's score is: -0.785
[92] random_holdout_set_from_training_data's score is: -0.785
[94] random_holdout_set_from_training_data's score is: -0.785
[96] random_holdout_set_from_training_data's score is: -0.785
[98] random_holdout_set_from_training_data's score is: -0.784
[100] ra

In [14]:
regresors = dict()
from sklearn.linear_model import ARDRegression,BayesianRidge,ElasticNet,ElasticNetCV,HuberRegressor,LinearRegression,LogisticRegression,LogisticRegressionCV,PassiveAggressiveRegressor,RandomizedLogisticRegression,Ridge,TheilSenRegressor
# regresors['ARDRegression'] = ARDRegression()
regresors['BayesianRidge'] = BayesianRidge()
regresors['ElasticNet'] = ElasticNet()
regresors['ElasticNetCV'] = ElasticNetCV() 
regresors['HuberRegressor'] = HuberRegressor()
regresors['LinearRegression'] = LinearRegression()
regresors['LogisticRegression'] = LogisticRegression()
regresors['LogisticRegressionCV'] = LogisticRegressionCV()
regresors['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor()
regresors['Ridge'] = Ridge()
regresors['TheilSenRegressor'] = TheilSenRegressor()


In [15]:
number_of_component = [1,2,4,8,10,16,24,32]

for k,v in regresors.items():
    for noc in number_of_component:
        check_predictor(PCAOnlyRatingsRegresor(noc, v), 'PCAOnlyRatingsRegresor_%s_%d'%(k,noc), True)

min score:  0.8494882469564073   PCAOnlyRatingsRegresor_BayesianRidge_1
min score:  0.8121626159601222   PCAOnlyRatingsRegresor_BayesianRidge_2
min score:  0.7954539877169253   PCAOnlyRatingsRegresor_BayesianRidge_4
min score:  0.7772594696313502   PCAOnlyRatingsRegresor_BayesianRidge_8
min score:  0.7846004382376315   PCAOnlyRatingsRegresor_BayesianRidge_10
min score:  0.7767135952835995   PCAOnlyRatingsRegresor_BayesianRidge_16
min score:  0.7752707737233668   PCAOnlyRatingsRegresor_BayesianRidge_24
min score:  0.7703669225815173   PCAOnlyRatingsRegresor_BayesianRidge_32
min score:  0.8619823396517561   PCAOnlyRatingsRegresor_ElasticNet_1
min score:  0.8508942564671204   PCAOnlyRatingsRegresor_ElasticNet_2
min score:  0.8539565017814607   PCAOnlyRatingsRegresor_ElasticNet_4
min score:  0.8564247875310278   PCAOnlyRatingsRegresor_ElasticNet_8
min score:  0.8532179687388005   PCAOnlyRatingsRegresor_ElasticNet_10
min score:  0.8507019621484208   PCAOnlyRatingsRegresor_ElasticNet_16
min 



min score:  1.713038580429508   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_1
min score:  1.2668035668692172   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_2
min score:  1.3282508523002676   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_4
min score:  1.189246555524724   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_8
min score:  1.1628207944912938   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_10
min score:  1.1811155751290139   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_16
min score:  1.1123273946082972   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_24
min score:  1.1165017669104362   PCAOnlyRatingsRegresor_PassiveAggressiveRegressor_32
min score:  0.8457358114825414   PCAOnlyRatingsRegresor_Ridge_1
min score:  0.8168360034871794   PCAOnlyRatingsRegresor_Ridge_2
min score:  0.7920161890164346   PCAOnlyRatingsRegresor_Ridge_4
min score:  0.781585377638214   PCAOnlyRatingsRegresor_Ridge_8
min score:  0.7829438462226717   PCAOnlyRatingsRegresor_Ridge_1

In [16]:
headers = filling_nan_methods.keys()
values = [[k] + list(scores[k]) for k in sorted(scores.keys(), key=get_min_score)]
print(tabulate(values, headers=headers, tablefmt='orgtbl'))

|                                                      |      none |   with_mean |   with_zeros |
|------------------------------------------------------+-----------+-------------+--------------|
| AutoMl_Regresor                                      |   0.76833 |    0.774097 |     0.761458 |
| NaiveRegresor_WithDateSubstraction                   | inf       |    0.777165 |     0.766795 |
| PCAOnlyRatingsRegresor_Ridge_24                      | inf       |    0.767568 |     0.803483 |
| NaiveRegresor                                        | inf       |    0.773277 |     0.767765 |
| NaiveRegresor_WithNoDate                             | inf       |    0.773276 |     0.769313 |
| PCAOnlyRatingsRegresor_LinearRegression_24           | inf       |    0.770215 |     0.800179 |
| PCAOnlyRatingsRegresor_BayesianRidge_32              | inf       |    0.770367 |     0.797272 |
| PCAOnlyRatingsRegresor_ElasticNetCV_16               | inf       |    0.770461 |     0.804733 |
| PCAOnlyRatingsRegr