# Coal Price Forecasting Framework

This notebook demonstrates the implementation and testing of our coal price forecasting framework.
We'll go through:
1. Data loading and preprocessing
2. Feature engineering
3. Model training and evaluation
4. Forecasting and analysis

In [None]:
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Add project root to Python path
PROJECT_ROOT = os.path.dirname(os.path.abspath('.'))
sys.path.append(PROJECT_ROOT)

# Import our modules
from src.utils.logging_config import setup_logging, load_config, CONFIG_PATH
from src.data_processing.data_processor import DataProcessor
from src.modeling.ml_ensemble import MLEnsemble

In [None]:
# Set up logging and load configuration
setup_logging()
config = load_config(CONFIG_PATH)

if config is None:
    raise ValueError("Failed to load configuration")

print("Configuration loaded successfully")

## 1. Load and Process Sample Data

For initial testing, we'll create some sample data that mimics our expected data structure.

In [None]:
# Create sample data
dates = pd.date_range(start='2020-01-01', end='2024-01-01', freq='M')
np.random.seed(config['project_settings']['random_seed'])

# Create sample price and feature data
sample_data = pd.DataFrame({
    'Date': dates,
    'Newcastle_FOB_6000_NAR': 100 + np.random.normal(0, 15, len(dates)) + np.linspace(0, 30, len(dates)),
    'Henry_Hub_Spot': 3 + np.random.normal(0, 0.5, len(dates)) + np.sin(np.linspace(0, 4*np.pi, len(dates))),
    'EUA_Carbon_Futures': 25 + np.random.normal(0, 3, len(dates)) + np.linspace(0, 15, len(dates)),
    'Baltic_Dry_Index': 1500 + np.random.normal(0, 200, len(dates)) * np.sin(np.linspace(0, 8*np.pi, len(dates))),
    'China_Manufacturing_PMI': 50 + np.random.normal(0, 1, len(dates))
})

sample_data.set_index('Date', inplace=True)
sample_data.head()

In [None]:
# Initialize data processor
data_processor = DataProcessor(config)
data_processor.data = sample_data.copy()

# Process the data
data_processor.clean_data()
data_processor.create_features()
data_processor.transform_data()

processed_data = data_processor.data.copy()
print("Features created:", processed_data.columns.tolist())

## 2. Train-Test Split and Model Training

In [None]:
# Split data into training and testing
train_end = pd.Timestamp(config['modeling']['train_end_date'])
train_data = processed_data[processed_data.index <= train_end]
test_data = processed_data[processed_data.index > train_end]

# Prepare features and target
X_train, y_train = data_processor.prepare_data(train_data)
X_test, y_test = data_processor.prepare_data(test_data)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

In [None]:
def calculate_metrics(y_true, y_pred):
    """
    Calculate various evaluation metrics
    """
    metrics = {
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAE': mean_absolute_error(y_true, y_pred),
        'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    }
    return metrics

def plot_forecast_vs_actual(y_true, y_pred, title='Forecast vs Actual'):
    """
    Plot forecasted values against actual values
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=y_true, name='Actual'))
    fig.add_trace(go.Scatter(y=y_pred, name='Forecast'))
    fig.update_layout(title=title)
    fig.show()