# Simple Modeling

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score, roc_auc_score
import xgboost as xgb
from xgboost import XGBRegressor, XGBClassifier

In [None]:
x_train = pd.read_csv('data/train_preprocessed.csv')
x_test = pd.read_csv('data/test_preprocessed.csv')
x_train.head()

In [None]:
drop_columns = ['ATA', 'FLAG', 'SHIPMANAGER', 'U_WIND', 'V_WIND', 'AIR_TEMPERATURE', 'BN', 'ARI']
x_train.drop(columns=drop_columns, inplace=True)
x_test.drop(columns=drop_columns, inplace=True)

In [None]:
x_train = x_train.loc[x_train.DIST!=0, :]
x_test = x_test.loc[x_test.DIST!=0, :]

y_train = x_train.CI_HOUR.copy()
y_train = np.log(y_train + 1)

In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 6, 7],
    'colsample_bylevel': [0.8, 0.9, 1.0],
    'colsample_bynode': [0.8, 0.9, 1.0],
    'gamma': [0, 0.2, 0.4],
    'alpha': [0, 0.1, 0.01],
    'lambda': [0, 0.1, 0.01],
}
model = XGBRegressor(tree_method='gpu_hist', objective='reg:absoluteerror')
grid_search = GridSearchCV(model, param_grid=param_grid, scoring='neg_mean_absolute_error', verbose=3, cv=5)

In [None]:
grid_search.fit(x_train, y_train)