In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import numpy as np

# Function to calculate MASE
def mean_absolute_scaled_error(y_true, y_pred, y_train):
    n = len(y_train)
    d = np.abs(np.diff(y_train)).sum() / (n - 1)
    errors = np.abs(y_true - y_pred)
    return errors.mean() / d

# Loaded the adjusted train sales and validation data
train_data_path = 'adjusted_train_sales_data.csv'
validation_data_path = 'adjusted_train_sales_validation_data.csv'
train_data = pd.read_csv(train_data_path)
validation_data = pd.read_csv(validation_data_path)


# Assuming 'days_to_departure' and 'train_type' are used as features
train_features = train_data[['days_to_departure', 'train_type']]
train_target = train_data['adjusted_simulated_bookings']
validation_features = validation_data[['days_to_departure', 'train_type']]
validation_target = validation_data['adjusted_simulated_bookings']

# One-Hot Encoding for categorical variables (train_type)
train_features = pd.get_dummies(train_features, columns=['train_type'])
validation_features = pd.get_dummies(validation_features, columns=['train_type'])

# Model Training and Forecasting
model = RandomForestRegressor(n_estimators=100, random_state=0)
model.fit(train_features, train_target)

# Forecasting on the validation data
predictions = model.predict(validation_features)

# MASE Calculation
mase_score = mean_absolute_scaled_error(validation_target, predictions, train_target)

print(f'MASE Score: {mase_score}')


MASE Score: 4.916357150679857
