# Imports

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import r2_score
import pycaret.regression as pr

In [None]:
cols_to_drop = [
    'PROJECT_ID',
    'TRAIN_VAL_TEST_SPLIT',
    'DEADLINE',
    'STATE_CHANGED_AT',
    'CREATED_AT',
    'LAUNCHED_AT',
    'KEYWORDS',
    
    'DESC',
    'NAME',
    'FINAL_STATUS',
    'BACKERS_COUNT',
]

# Load data

In [None]:
filepath = fr'/kaggle/input/kickstarter/01_df_development.pkl'
df_development = pd.read_pickle(filepath)
df_development['BACKERS_COUNT_LOG'] = np.log1p(df_development['BACKERS_COUNT'])
df_development = df_development.drop(cols_to_drop, axis=1)
df_development

In [None]:
df_development.isna().sum()

# Run PyCaret

In [None]:
s1 = pr.setup(data=df_development, train_size=0.8, target='BACKERS_COUNT_LOG', session_id=42)

In [None]:
def r2_adjusted(y_true, y_pred, **kwargs):
    n = y_true.shape[0]
    p = kwargs['num_predictors'] if 'num_predictors' in kwargs else 1
    r2 = r2_score(y_true, y_pred)
    r2_adj = 1 - (1 - r2) * ((n - 1) / (n - p - 1))
    return r2_adj

pr.add_metric('r2_adj', 'R2 Adjusted', r2_adjusted)
pr.get_metrics()

In [None]:
best = pr.compare_models(sort='R2 Adjusted', n_select=3)

In [None]:
best_tuned = [pr.tune_model(i, optimize='R2 Adjusted') for i in best]

In [None]:
best_blended = pr.blend_models(best_tuned, optimize='R2 Adjusted')
best_stacked = pr.stack_models(best_tuned, optimize='R2 Adjusted')

best_overall_model = pr.automl(optimize='R2 Adjusted')
best_overall_model

# Create predictions

In [None]:
predict_holdout = pr.predict_model(best_overall_model)
predict_holdout

In [None]:
predict_holdout['prediction_label_exp'] = np.expm1(predict_holdout['prediction_label'])
predict_holdout

In [None]:
predict_holdout.index

In [None]:
true_backers = pd.read_pickle(filepath)['BACKERS_COUNT']
true_backers

In [None]:
pd.concat([
    true_backers.iloc[predict_holdout.index],
    predict_holdout['prediction_label_exp']
], axis=1)


# Save to files

In [None]:
predict_holdout.to_pickle('/kaggle/working/predict_holdout.pkl')
pr.save_model(best_overall_model, '/kaggle/working/saved_best_overall_model_stack')