In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Load the data
training_data = np.load('training_data.npy')
valid_periods = np.load('valid_periods.npy')
categories = np.load('categories.npy')

# Define the categories
category_dict = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5}

# Convert categories to numerical values
numerical_categories = np.vectorize(category_dict.get)(categories)

# Find the maximum length of the valid periods
max_length = max(valid_periods[:, 1] - valid_periods[:, 0])

# Pad the sequences in valid_training_data to the maximum length
valid_training_data_padded = np.array([np.pad(training_data[i, valid_periods[i, 0]:valid_periods[i, 1]], (0, max_length - (valid_periods[i, 1] - valid_periods[i, 0]))) for i in range(training_data.shape[0])])

# Use valid_training_data_padded instead of valid_training_data
X_train, X_test, y_train, y_test = train_test_split(valid_training_data_padded, numerical_categories, test_size=0.2, random_state=42)

# Train a forecasting model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test data
predictions = model.predict(X_test)

In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt

# Calculate Mean Absolute Error
mae = mean_absolute_error(y_test, predictions)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, predictions)

# Calculate Root Mean Squared Error
rmse = sqrt(mse)

# Calculate Mean Absolute Percentage Error
epsilon = 1e-10  # small constant to avoid division by zero
mape = np.mean(np.abs((y_test - predictions) / (y_test + epsilon))) * 100

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"MAPE: {mape}%")

MAE: 0.6770029513888889
MSE: 0.8406661953125
RMSE: 0.9168785062986807
MAPE: 121936458361.92308%
