In [134]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel
from sklearn.metrics import r2_score

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, train_test_split, RepeatedKFold
from tqdm import tqdm
from typing import Tuple, Union

In [2]:



def prepare_dataset(DATA_PATH: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Prepare dataset.
    Load data, split into X and y, one-hot encode categorical

    Parameters
    ----------
    DATA_PATH: str :
        path to the dataset

    Returns
    -------
    Tuple[np.ndarray, np.ndarray] :
        X and y
    """
    df = pd.read_csv(DATA_PATH)
    df = df.drop(["ID"], axis=1)
    y = df.pop("y").values

    # select only numeric columns
    X_num = df.select_dtypes(include="number")

    # select only categorical columns and one-hot encode them
    X_cat = df.select_dtypes(exclude="number")
    X_cat = pd.get_dummies(X_cat)

    # combine numeric and categorical
    X = pd.concat([X_num, X_cat], axis=1)
    X = X.fillna(0).values

    return X, y

In [170]:
data_path = "train.csv.zip"
X, y = prepare_dataset(data_path)
params_list = [
    {"max_depth": 10},  # baseline
    {"max_depth": 2},
    {"max_depth": 3},
    # {"max_depth": 4},
    # {"max_depth": 5},
    # {"max_depth": 9},
    # {"max_depth": 11},
    {"max_depth": 12},
    {"max_depth": 15},
]

In [135]:
from typing import Callable, Dict

from typing import List


def cross_val_score(
        model: Callable,
        X: np.ndarray,
        y: np.ndarray,
        cv: Union[int, Tuple[int, int]],
        params_list: List[Dict],
        scoring: Callable,
        random_state: int = 42,
        show_progress: bool = False,
) -> np.ndarray:

    if isinstance(cv, int):
        kf = KFold(n_splits=cv,shuffle=True, random_state=random_state)
    if isinstance(cv, tuple):
        kf = RepeatedKFold(n_splits=cv[0], n_repeats=cv[1], random_state=random_state)
    all_scores = []
    for n,params in tqdm(enumerate(params_list)):
        # fit
        model.set_params(**params)
        model_scores = []
        for train_index, test_index in kf.split(X):

            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            model.fit(X_train, np.log1p(y_train))
        # predict
            y_pred = np.expm1(model.predict(X_test))
        #evaluate
            score = scoring(y_test, y_pred)
            model_scores.append(score)
        all_scores.append(model_scores)
    return np.array(all_scores)

In [136]:
cv = (2,3)

In [157]:
all_scores_int = cross_val_score(RandomForestRegressor(n_estimators=100), X=X, y=y, cv=2, params_list=params_list, scoring=r2_score)
all_scores_tuple = cross_val_score(RandomForestRegressor(n_estimators=100), X=X, y=y, cv=cv, params_list=params_list, scoring=r2_score)

5it [00:27,  5.56s/it]
5it [01:07, 13.42s/it]


In [166]:
all_scores_int

array([[0.50494151, 0.59079276],
       [0.44752522, 0.5645706 ],
       [0.51910232, 0.60335357],
       [0.50942104, 0.59089824],
       [0.50139704, 0.5828795 ]])

In [164]:
all_scores_mean = []
if isinstance(cv, tuple):
    for j in range(cv[0]):
        indixes = [i for i in(range(cv[0]*cv[1])) if i % cv[0] == j]
        all_scores_mean.append(all_scores_tuple[:,indixes].mean(axis=1))
    all_scores_mean = np.array(all_scores_mean).T
if isinstance(cv, int):
    all_scores_mean = all_scores.copy()

In [168]:
compare_list = []
for i ,*(value) in enumerate(all_scores_mean[1:]):
    mean = np.array(value).mean()
    baseline_mean = np.array(all_scores_mean[0]).mean()
    _, p_value = ttest_rel(all_scores_mean[0],*value)
    compare_dict = {}
    compare_dict['model_index'] = i+1
    compare_dict['avg_score'] = mean
    compare_dict['p_value'] = p_value
    compare_dict['effect_sign'] = (1 if mean > baseline_mean else -1) if p_value < alpha else 0
    compare_list.append(compare_dict)
np.array(sorted(compare_list, key=lambda x: x['avg_score'], reverse=True))

array([{'model_index': 2, 'avg_score': 0.5612279452414543, 'p_value': 0.038073349175414585, 'effect_sign': 1},
       {'model_index': 3, 'avg_score': 0.5501596432684417, 'p_value': 0.48501209266135786, 'effect_sign': 0},
       {'model_index': 4, 'avg_score': 0.542138266695591, 'p_value': 0.2319074847901385, 'effect_sign': 0},
       {'model_index': 1, 'avg_score': 0.5060479103388542, 'p_value': 0.2272633372071376, 'effect_sign': 0}],
      dtype=object)

In [88]:
all_scores_mean[1:].T[0]

array([0.47740395, 0.52913998, 0.51648235, 0.51039212])

In [169]:
def cross_val_score(
        model: Callable,
        X: np.ndarray,
        y: np.ndarray,
        cv: Union[int, Tuple[int, int]],
        params_list: List[Dict],
        scoring: Callable,
        random_state: int = 42,
        show_progress: bool = False,
) -> np.ndarray:
    if isinstance(cv, int):
        kf = KFold(n_splits=cv,shuffle=True, random_state=random_state)
    if isinstance(cv, tuple):
        kf = RepeatedKFold(n_splits=cv[0], n_repeats=cv[1], random_state=random_state)
    all_scores = []
    for n,params in tqdm(enumerate(params_list)):
        # fit
        model.set_params(**params)
        model_scores = []
        for train_index, test_index in kf.split(X):

            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            model.fit(X_train, np.log1p(y_train))
        # predict
            y_pred = np.expm1(model.predict(X_test))
        #evaluate
            score = scoring(y_test, y_pred)
            model_scores.append(score)
        all_scores.append(model_scores)
    return np.array(all_scores)



In [172]:
all_scores_int = cross_val_score(RandomForestRegressor(n_estimators=200), X=X, y=y, cv=(5,2), params_list=params_list, scoring=r2_score)
all_scores_int

5it [05:37, 67.53s/it]


array([[0.56186938, 0.43501315, 0.59687054, 0.6010241 , 0.58383614,
        0.59509957, 0.58787666, 0.55886014, 0.47818819, 0.56496156],
       [0.5097085 , 0.36850778, 0.52468815, 0.56239382, 0.54036264,
        0.55695809, 0.49103926, 0.49110835, 0.41930866, 0.54316185],
       [0.59384413, 0.43388222, 0.59811252, 0.61315444, 0.59782086,
        0.61562857, 0.59820368, 0.55156666, 0.47808389, 0.57617157],
       [0.57041839, 0.43773376, 0.59798818, 0.60519852, 0.5885233 ,
        0.59665023, 0.59098726, 0.5610724 , 0.47965388, 0.56760145],
       [0.54207877, 0.42317512, 0.5925958 , 0.59792945, 0.56566671,
        0.57541082, 0.57806532, 0.54890585, 0.47080889, 0.55658331]])

In [174]:
ttest_rel(all_scores_int[1], all_scores_int[0])

TtestResult(statistic=-8.196244152122365, pvalue=1.822908207065538e-05, df=9)

In [175]:
all_scores_mean = []
if isinstance(cv, tuple):
    for j in range(cv[0]):
        indixes = [i for i in(range(cv[0]*cv[1])) if i % cv[0] == j]
        all_scores_mean.append(all_scores_tuple[:,indixes].mean(axis=1))
    all_scores_mean = np.array(all_scores_mean).T
if isinstance(cv, int):
    all_scores_mean = all_scores_int.copy()

In [176]:
all_scores_mean

array([[0.53252316, 0.55498465],
       [0.48676735, 0.52533034],
       [0.5500064 , 0.57208828],
       [0.53420665, 0.55909289],
       [0.52444872, 0.54999557]])

In [177]:
ttest_rel(all_scores_mean[1], all_scores_mean[0])

TtestResult(statistic=-4.683422601298516, pvalue=0.1339194777047209, df=1)

In [179]:
alpha= 0.05
compare_list = []
for i ,*(value) in enumerate(all_scores_int[1:]):
    mean = np.array(value).mean()
    baseline_mean = np.array(all_scores_int[0]).mean()
    _, p_value = ttest_rel(all_scores_int[0],*value)
    compare_dict = {}
    compare_dict['model_index'] = i+1
    compare_dict['avg_score'] = mean
    compare_dict['p_value'] = p_value
    compare_dict['effect_sign'] = (1 if mean > baseline_mean else -1) if p_value < alpha else 0
    compare_list.append(compare_dict)
np.array(sorted(compare_list, key=lambda x: x['avg_score'], reverse=True))

array([{'model_index': 2, 'avg_score': 0.5656468536180654, 'p_value': 0.032221027784356475, 'effect_sign': 1},
       {'model_index': 3, 'avg_score': 0.5595827369756207, 'p_value': 0.001213844801917392, 'effect_sign': 1},
       {'model_index': 4, 'avg_score': 0.5451220040078354, 'p_value': 0.0002502157698045562, 'effect_sign': -1},
       {'model_index': 1, 'avg_score': 0.5007237089877614, 'p_value': 1.822908207065538e-05, 'effect_sign': -1}],
      dtype=object)

In [181]:
all_scores_int[0].mean()

0.5563599429372117