# MassBalanceMachine Model Training - Example for the Iceland Region

In [None]:
import massbalancemachine as mbm
from sklearn.model_selection import GridSearchCV
import pandas as pd
import sklearn
import numpy as np

In [None]:
dataset = pd.read_csv('./example_data/iceland/files/region_monthly.csv')

In [None]:
# Select features for training
df_X_train = dataset.drop(['YEAR', 'POINT_BALANCE'], axis=1)

# Move id and n_months to the end of the dataframe (these are used as metadata)
# Columns to move to the end
metadata_columns = ['ID', 'N_MONTHS', 'MONTH']

# Reindex the DataFrame, moving the specified columns to the end
df_X_train = df_X_train[[col for col in df_X_train.columns if col not in metadata_columns] + metadata_columns]

# Select the targets for training
df_y_train = dataset[['POINT_BALANCE']]

# Get arrays of features+metadata and targets
X_train, y_train = df_X_train.values, df_y_train.values

# Get glacier IDs from training dataset (in the order of which they appear in training dataset).
# gp_s is an array with shape equal to the shape of X_train_s and y_train_s.
glacier_ids = np.array(dataset['ID'].values)

# Use five folds
group_kf_s = sklearn.model_selection.GroupKFold(n_splits=5)

# Split into folds according to group by glacier ID.
# For each unique glacier ID, indices in gp_s indicate which rows in X_train_s and y_train_s belong to the glacier.
splits = list(group_kf_s.split(X_train, y_train, glacier_ids))

In [None]:
# Create a base estimator with full metadata
base_estimator = mbm.CustomXGBoostRegressor()

parameters = {
    'max_depth': [3, 4, 5,],
    'learning_rate': [0.01, 0.1],
    'n_estimators': [100, 200],
    'gamma':[0, 1]
}

clf = GridSearchCV(estimator=base_estimator,
                   param_grid=parameters,
                   cv=splits,
                   verbose=10, 
                   n_jobs=-1, 
                   refit=True, 
                   return_train_score=True,
                   error_score='raise')

# Fit using features
clf.fit(df_X_train, y_train)

In [None]:
base_estimator.clf