In [None]:
import pandas as pd
train = pd.read_csv('../input/preprocessed/train_v2.csv')
test = pd.read_csv('../input/preprocessed/test_v2.csv')


In [None]:
from sklearn.model_selection import KFold
import numpy as np
from sklearn.ensemble import AdaBoostRegressor

params = {'n_estimators': 250,
          'learning_rate': 0.01,
          'loss': 'square'}

def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

def feval_rmspe(y_pred, lgb_train):
    y_true = lgb_train.get_label()
    return 'RMSPE', rmspe(y_true, y_pred), False

def train_and_evaluate_adaboost(train, test):
    # Hyperparammeters (just basic)
    
    features = [col for col in train.columns if col not in {"time_id", "target", "row_id", "continuous"}]
    y = train['target']
    # Create out of folds array
    oof_predictions = np.zeros(train.shape[0])
    # Create test array to store predictions
    test_predictions = np.zeros(test.shape[0])
    # Create a KFold object
    kfold = KFold(n_splits = 5, random_state = 2021, shuffle = True)
    # Iterate through each fold
    for fold, (trn_ind, val_ind) in enumerate(kfold.split(train)):
        print(f'Training fold {fold + 1}')
        x_train, x_val = train.iloc[trn_ind], train.iloc[val_ind]
        y_train, y_val = y.iloc[trn_ind], y.iloc[val_ind]
       
        model = AdaBoostRegressor(**params)
        
        model = model.fit(x_train[features].values, y_train.values)
        # Add predictions to the out of folds array
        oof_predictions[val_ind] = model.predict(x_val[features].values)
        # Predict the test set
        test_predictions += model.predict(test[features].values) / 5
    rmspe_score = rmspe(y, oof_predictions)
    print(f'Our out of folds RMSPE is {rmspe_score}')
    # Return test predictions
    return test_predictions,model
# Traing and evaluate
predictions_adaboost,adaboost_model = train_and_evaluate_adaboost(train, test)
test['target'] = predictions_adaboost
test[['row_id', 'target']].to_csv('submission_adaboost.csv',index = False)


In [None]:
import pickle
filename = 'adaboost_model.sav'
pickle.dump(adaboost_model, open(filename, 'wb'))
