# Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from joblib import dump

In [2]:
X_train = np.load('../data/X_train.npy')
X_train_all_indices = np.load('../data/X_train_all_indices.npy')
X_train_selected_indices = np.load('../data/X_train_selected_indices.npy')
y_train = np.load('../data/y_train.npy')

# Model Training

In [3]:
model = DecisionTreeRegressor()

param_grid = {
    'max_depth': [None, 5, 10, 20, 50, 100],
    'min_samples_split': [2, 5, 10, 20, 50, 100, 200, 300],
    'min_samples_leaf': [1, 2, 5, 10, 20, 50, 100],
    'max_features': [None, 'sqrt', 'log2']
}

grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)
print(f"Best parameters: {grid_search.best_params_}")

Fitting 5 folds for each of 1008 candidates, totalling 5040 fits


In [None]:
model = DecisionTreeRegressor(max_depth=10, min_samples_leaf=10, min_samples_split=100)
model.fit(X_train, y_train)
dump(model, '../models/decision_tree_10_bands.joblib')

In [None]:
model_all_indices = DecisionTreeRegressor(max_depth=10, min_samples_leaf=10, min_samples_split=100)
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/decision_tree_all_bands.joblib')

In [None]:
model_selected_indices = DecisionTreeRegressor(max_depth=10, min_samples_leaf=10, min_samples_split=100)
model_selected_indices.fit(X_train_selected_indices, y_train)
dump(model_selected_indices, '../models/decision_tree_selected_bands.joblib')