# Importing Libraries and Datasets

## Libraries

In [1]:
import numpy as np
import pandas as pd

## Training Dataset

In [2]:
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:, 1:-6].values
y = dataset.iloc[:, -6:-4].values

## Testing Dataset

In [3]:
Test_dataset = pd.read_csv('test.csv')
X_submission = Test_dataset.iloc[:, 1:].values

# Spliting Training Dataset into test and training sets

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# Feature Scaling

## Training Dataset

### Training Set

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

### Test Set

In [6]:
X_test = sc.transform(X_test)

## Test Dataset

In [7]:
X_submission = sc.transform(X_submission)

# Training and Tuning using Optuna

In [8]:
import catboost as cb
from sklearn.metrics import mean_squared_error
import optuna

def objective(trial):
    params = {
        "iterations": 1000,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
    }

    model = cb.CatBoostRegressor(**params, silent=True)
    model.fit(X_train, y_train[:, 1])
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test[:, 1], predictions, squared=False)
    return rmse


study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

print('Best hyperparameters for EC2:', study.best_params)
print('Best RMSE:', study.best_value)

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-07-08 16:19:23,866] A new study created in memory with name: no-name-7fda0aa3-d809-4db0-b93c-e9db3735cac6
[I 2023-07-08 16:19:26,048] Trial 0 finished with value: 0.40487043131956657 and parameters: {'learning_rate': 0.012907581222204798, 'depth': 6, 'subsample': 0.25936431377184477, 'colsample_bylevel': 0.07110929076261015, 'min_data_in_leaf': 26}. Best is trial 0 with value: 0.40487043131956657.
[I 2023-07-08 16:19:27,090] Trial 1 finished with value: 0.40598895789332134 and parameters: {'learning_rate': 0.007056517456314465, 'depth': 1, 'subsample': 0.15647215701928285, 'colsample_bylevel': 0.6515562630266812, 'min_data_in_leaf': 15}. Best is trial 0 with value: 0.40487043131956657.
[I 2023-07-08 16:19:28,255] Trial 2 finished with value: 0.4060453021400358 and parameters: {'learning_rate': 0.003205379503296367, 'depth': 1, 'subsample': 0.7684337891860139, 'colsample_bylevel': 0.7350207077962227, 'min_data_in_leaf': 1}. Best 

Best hyperparameters for EC2: {'learning_rate': 0.01632700367902254, 'depth': 3, 'subsample': 0.6001420520780691, 'colsample_bylevel': 0.5074737933892164, 'min_data_in_leaf': 75}
Best RMSE: 0.40452560255291253
