In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# --- Function to calculate MAPE safely ---
def calculate_mape(y_true, y_pred):
    y_true_safe = y_true.copy()
    y_true_safe[y_true_safe == 0] = 1e-6 # Avoid division by zero
    return np.mean(np.abs((y_true - y_pred) / y_true_safe)) * 100
# --- Function to calculate SMAPE ---
def calculate_smape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    # Avoid division by zero
    denominator = np.where(denominator == 0, 1e-6, denominator)
    return np.mean(np.abs(y_true - y_pred) / denominator) * 100

# --- 1. Load and Preprocess the Dataset ---
try:
    df = pd.read_csv('E:/final_pharma_forcast/DATA/holidays (1).csv')
    df['datum'] = pd.to_datetime(df['datum'])
    df = df.set_index('datum')
    if 'Weekday Name' in df.columns:
        le = LabelEncoder()
        df['Weekday Name'] = le.fit_transform(df['Weekday Name'])
except FileNotFoundError:
    print("Could not find 'holidays (1).csv'.")
    exit()

# --- 2. Define Targets and Features ---
target_cols = ['M01AB', 'M01AE', 'N02BA', 'N02BE', 'N05B', 'N05C', 'R03', 'R06']
non_feature_cols = ['Hour'] + target_cols
features = [col for col in df.columns if col not in non_feature_cols]

# --- 3. Loop, Train, Evaluate, and Visualize ---
for target in target_cols:
    print(f"Processing Category: {target}")
    print("="*40)

    X = df[features]
    y = df[target]

    # Drop rows with NaN in target
    valid_indices = ~y.isna()
    X, y = X[valid_indices], y[valid_indices]
    
    # Split data chronologically
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    # Train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    predictions = model.predict(X_test)

    # --- Evaluation Metrics ---
    mse = mean_squared_error(y_test, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, predictions)
    mape = calculate_mape(y_test, predictions)
    smape = calculate_smape(y_test, predictions)

    print("Evaluation Metrics:")
    print(f"  MSE:  {mse:.4f}")
    print(f"  MAE:  {mae:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    # print(f"  MAPE: {mape:.2f}%\n")
    print(f"  SMAPE: {smape:.2f}%\n")

    # --- Forecast Sample ---
    forecast_df = pd.DataFrame({
        'Actual Sales': y_test,
        'Forecasted Sales': predictions
    })
    print("Forecast Sample (First 10 values from Test Data):")
    print(forecast_df.head(10).to_string())
    print("\n")

    # --- Visualization ---
    plt.figure(figsize=(15, 6))
    plt.plot(y_test.index, y_test, label='Actual Sales', color='blue')
    plt.plot(y_test.index, predictions, label='Forecasted Sales', color='orange', linestyle='--')
    plt.title(f'Sales Forecast vs. Actual for {target}')
    plt.xlabel('Date')
    plt.ylabel('Sales (DDD)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'forecast_visualization_{target}.png')
    plt.close()
    print(f"Visualization saved as 'forecast_visualization_{target}.png'")
    print("\n" + "-"*40 + "\n")

Processing Category: M01AB
Evaluation Metrics:
  MSE:  8.2094
  MAE:  2.2381
  RMSE: 2.8652
  SMAPE: 46.69%

Forecast Sample (First 10 values from Test Data):
            Actual Sales  Forecasted Sales
datum                                     
2018-08-13          3.33          5.069800
2018-08-14          1.33          5.540683
2018-08-15          8.34          4.396200
2018-08-16          3.33          4.022900
2018-08-17          2.33          4.742400
2018-08-18          6.33          4.005200
2018-08-19          6.33          5.627300
2018-08-20          4.00          6.809600
2018-08-21          2.34          6.194000
2018-08-22          5.00          5.707300


Visualization saved as 'forecast_visualization_M01AB.png'

----------------------------------------

Processing Category: M01AE
Evaluation Metrics:
  MSE:  5.1467
  MAE:  1.7494
  RMSE: 2.2686
  SMAPE: 49.78%

Forecast Sample (First 10 values from Test Data):
            Actual Sales  Forecasted Sales
datum               