In [16]:
import catboost
import ipywidgets
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, classification_report
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor

In [17]:
DATA_PATH = '../../data/'
FINAL_STAGE = 'final_stage/'

In [20]:
X_train = pd.read_csv(f'{DATA_PATH}{FINAL_STAGE}x_train.csv',sep='\t', index_col='index')
X_test = pd.read_csv(f'{DATA_PATH}{FINAL_STAGE}x_test.csv', sep='\t', index_col='index')
y_train = pd.read_csv(f'{DATA_PATH}{FINAL_STAGE}y_train.csv',sep='\t', index_col='index')
y_test = pd.read_csv(f'{DATA_PATH}{FINAL_STAGE}y_test.csv',sep='\t', index_col='index')


In [21]:
eval_dataset = catboost.Pool(X_test, y_test)

In [22]:
model = catboost.CatBoostRegressor(iterations=10000,
                            learning_rate=0.002,
                            task_type='GPU',
                            loss_function='RMSE',
                            random_seed= 42)

In [23]:
model.fit(X_train, y_train,use_best_model=True, eval_set= eval_dataset)

0:	learn: 0.0680157	test: 0.0729745	best: 0.0729745 (0)	total: 37.6ms	remaining: 6m 15s
1:	learn: 0.0678995	test: 0.0728569	best: 0.0728569 (1)	total: 72.1ms	remaining: 6m
2:	learn: 0.0677834	test: 0.0727391	best: 0.0727391 (2)	total: 95.1ms	remaining: 5m 16s
3:	learn: 0.0676672	test: 0.0726213	best: 0.0726213 (3)	total: 115ms	remaining: 4m 47s
4:	learn: 0.0675523	test: 0.0725045	best: 0.0725045 (4)	total: 133ms	remaining: 4m 26s
5:	learn: 0.0674369	test: 0.0723880	best: 0.0723880 (5)	total: 150ms	remaining: 4m 10s
6:	learn: 0.0673220	test: 0.0722720	best: 0.0722720 (6)	total: 165ms	remaining: 3m 55s
7:	learn: 0.0672095	test: 0.0721582	best: 0.0721582 (7)	total: 178ms	remaining: 3m 42s
8:	learn: 0.0670974	test: 0.0720447	best: 0.0720447 (8)	total: 192ms	remaining: 3m 32s
9:	learn: 0.0669830	test: 0.0719293	best: 0.0719293 (9)	total: 204ms	remaining: 3m 24s
10:	learn: 0.0668712	test: 0.0718163	best: 0.0718163 (10)	total: 217ms	remaining: 3m 17s
11:	learn: 0.0667574	test: 0.0717009	best:

<catboost.core.CatBoostRegressor at 0x1d7eb8a5148>

In [24]:
r2_score(y_test,model.predict(X_test))

0.8467708531494106

In [8]:
mean_absolute_error(y_test,model.predict(X_test))

0.03826380890192227

In [9]:
mean_squared_error(y_test,model.predict(X_test))

0.0025543847653607572

In [10]:
mean_absolute_error(y_test,model.predict(X_test))

0.03826380890192227

In [11]:
mean_squared_error(y_test,model.predict(X_test))

0.0025543847653607572

In [12]:
np.random.seed(42)

best_score = 0
best_model = None
for n_estimators in np.arange(250, 350, 25):
    for lr in np.arange(0.001, 0.1, 0.0025):
        current_model  = GradientBoostingRegressor(
            n_estimators=n_estimators,
            learning_rate=lr)
        current_model.fit(X_train, y_train.values.ravel())
        current_score = r2_score(y_test.values.ravel(), current_model.predict(X_test))

        if current_score > best_score:
            best_score = current_score
            best_model = current_model
            print(f'{best_score}')

0.11904833242294965
0.2757792731957951
0.34850044850912676
0.3913781654258591
0.41942504659198687
0.4351316331570505
0.44554400616370526
0.4535212931667165
0.4567412998768057
0.46300777446027497
0.46689403070760205
0.47167196551341184
0.4727888297351196
0.47795761377753665
0.47926765723024667
0.4819327718227635
0.48348428603931626
0.4837603266510957
0.4841709668997376
0.48533938994978854
0.48631749285690384


In [25]:
best_score = 0
best_model3 = None
for n in range(5, 30, 1):
    current_model  = KNeighborsRegressor(n_neighbors=n)
    current_model.fit(X_train, y_train.values.ravel())
    current_score = r2_score(y_test.values.ravel(), current_model.predict(X_test))

    if current_score > best_score:
        best_score = current_score
        best_model3 = current_model
        print(f'{best_score}')

0.8290947065561657
0.8357330745067698
0.8372722724690107
0.8383963179489028
0.8399883514162287
0.8424477205756857
0.842668598592162
0.8432489313836007
0.844375205892071
0.8474937189504501
0.8476694837882512
0.8492167145905687
0.8496692422918759
