In [1]:
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import BayesianRidge
import gc

In [2]:
df = pd.read_pickle("df_completeMy.pkl")

In [3]:
df['date_block_num'].unique

<bound method Series.unique of 4488710     12
4488711     12
4488712     12
4488713     12
4488714     12
            ..
11056225    34
11056226    34
11056227    34
11056228    34
11056229    34
Name: date_block_num, Length: 6567520, dtype: int8>

In [3]:
X_train = df[~df.date_block_num.isin([34])]
y_train = X_train['item_cnt_month']
del X_train['item_cnt_month']

#X_val = df[df['date_block_num']==33]
#y_val = X_val['item_cnt_month']
#del X_val['item_cnt_month']

X_test = df[df['date_block_num']==34].drop(columns='item_cnt_month')
X_test = X_test.reset_index()
del X_test['index']

del df
gc.collect()


0

In [None]:
#untuned AdaBoost
abr = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(min_samples_leaf = 5), n_estimators=100, learning_rate=0.01 , random_state=0)
abr.fit(X_train, y_train)
submission = pd.read_csv('sample_submission.csv')
submission['item_cnt_month'] = abr.predict(X_test).clip(0,20)
submission[['ID', 'item_cnt_month']].to_csv('ada_tuning2_submission.csv', index=False)

In [None]:
#AdaBoost Hyperparameters Tuning
abr = AdaBoostRegressor(base_estimator=DecisionTreeRegressor())

parameters = {'base_estimator__max_depth':[2,6,10],#i for i in range(2,11,2)],
              'base_estimator__min_samples_leaf':[5,10],
              'n_estimators':[10,50,100,250],
              'learning_rate':[0.001,0.01,0.1]}

clf = GridSearchCV(abr, parameters,scoring='neg_root_mean_squared_error',n_jobs=-1, return_train_score = True)
clf.fit(X_train,y_train)
print('best_params:', clf.best_params_)

In [None]:
#AdaBoost Tuned
DTR = DecisionTreeRegressor(max_depth = clf.best_params_['base_estimator__max_depth'], min_samples_leaf = clf.best_params_['base_estimator__min_samples_leaf'])
abr = AdaBoostRegressor(base_estimator = DTR, n_estimators=clf.best_params_['n_estimators'], learning_rate= clf.best_params_['learning_rate'], random_state=0)
abr.fit(X_train, y_train)
submission = pd.read_csv('sample_submission.csv')
submission['item_cnt_month'] = abr.predict(X_test).clip(0,20)
submission[['ID', 'item_cnt_month']].to_csv('tuned_submission.csv', index=False)

In [5]:
#Untuned BayesianRidge
byr = BayesianRidge()
byr.fit(X_train,y_train)
y_pred_non_tuning = byr.predict(X_test)

byr = BayesianRidge()
byr.fit(X_train,y_train)
submission = pd.read_csv('sample_submission.csv')
submission['item_cnt_month'] = byr.predict(X_test).clip(0,20)
submission[['ID', 'item_cnt_month']].to_csv('nt_byr_submission.csv', index=False)


In [None]:
#BayesianRidge Hyperparameters Tuning
print("hyperparameter tuning")

bay_params = {'alpha_init':[1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.9], 'lambda_init': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-9]}
a = GridSearchCV(estimator=BayesianRidge(), param_grid = bay_params, cv = 3, n_jobs=-1) 
a.fit(X_train,y_train)

print(a.best_params_)

In [None]:
#BayesianRidge Hyperparameters Tuned
byr = BayesianRidge(alpha_init = a.best_params_['alpha_init'], lambda_init = a.best_params_['lambda_init'])
byr.fit(X_train,y_train)
y_pred_non_tuning = byr.predict(X_test)
submission['item_cnt_month'] = byr.predict(X_test).clip(0,20)
submission[['ID', 'item_cnt_month']].to_csv('byr_submission.csv', index=False)