# Importing Libraries and Datasets

## Libraries

In [1]:
import numpy as np
import pandas as pd

## Training Dataset

In [2]:
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:, 1:-6].values
y = dataset.iloc[:, -6:-4].values

## Testing Dataset

In [3]:
Test_dataset = pd.read_csv('test.csv')
X_submission = Test_dataset.iloc[:, 1:].values

# Spliting Training Dataset into test and training sets

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# Feature Scaling

## Training Dataset

### Training Set

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

### Test Set

In [6]:
X_test = sc.transform(X_test)

## Test Dataset

In [7]:
X_submission = sc.transform(X_submission)

# Training and Tuning using Optuna

In [8]:
import xgboost
from sklearn.metrics import mean_squared_error
import optuna

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0)
    }
    model = xgboost.XGBRegressor(**param)
    model.fit(X_train, y_train[:, 0])
    predictions = model.predict(X_test)
    return mean_squared_error(y_test[:,0], predictions, squared=False)


# Create the study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Print the best parameters
print('Best parameters', study.best_params)

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-07-09 12:16:01,973] A new study created in memory with name: no-name-69cb855d-5141-455c-a9be-531ad4049e48
[I 2023-07-09 12:16:02,199] Trial 0 finished with value: 0.46992865814417206 and parameters: {'max_depth': 2, 'learning_rate': 0.8883505759378988, 'n_estimators': 203, 'min_child_weight': 8, 'gamma': 0.42406630868952166, 'subsample': 0.7470873236543372, 'colsample_bytree': 0.1954053266955858, 'reg_alpha': 0.07560406608975936, 'reg_lambda': 0.6629720563237611}. Best is trial 0 with value: 0.46992865814417206.
[I 2023-07-09 12:16:04,851] Trial 1 finished with value: 1.3501554914305905e+18 and parameters: {'max_depth': 10, 'learning_rate': 0.795750728106047, 'n_estimators': 978, 'min_child_weight': 9, 'gamma': 0.09375843632805986, 'subsample': 0.10735492447974528, 'colsample_bytree': 0.9751252261493091, 'reg_alpha': 0.5872631102431511, 'reg_lambda': 0.4544009056315452}. Best is trial 0 with value: 0.46992865814417206.
[I 2023-0

Best parameters {'max_depth': 2, 'learning_rate': 0.06888600413221835, 'n_estimators': 201, 'min_child_weight': 3, 'gamma': 0.0356537935597181, 'subsample': 0.919214627352363, 'colsample_bytree': 0.6479551640312314, 'reg_alpha': 0.4805888998558167, 'reg_lambda': 0.2234388007912691}
