In [4]:
# ‚úÖ Import Required Libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, r2_score

# === ‚úÖ Step 1: Load Data for Testing ===
def test_data_loading():
    """Test if dataset loads correctly."""
    file_path = r"C:\Users\MSI\Desktop\Critical_mineral\data\lithium-production.csv"
    if not os.path.exists(file_path):
        raise FileNotFoundError("‚ö†Ô∏è Dataset file not found! Check the file path.")
    
    df = pd.read_csv(file_path)
    assert not df.empty, "‚ö†Ô∏è Dataset is empty!"
    assert "Year" in df.columns, "‚ö†Ô∏è Missing 'Year' column!"
    assert "Lithium production - kt" in df.columns, "‚ö†Ô∏è Missing 'Lithium production - kt' column!"
    assert "Entity" in df.columns, "‚ö†Ô∏è Missing 'Entity' column!"
    print("\n‚úÖ Data loading test passed!")
    return df

df = test_data_loading()

# === ‚úÖ Step 2: Preprocessing Testing ===
def test_preprocessing(df):
    """Test if data preprocessing is applied correctly."""
    df_world = df[df["Entity"] == "World"]
    yearly_production = df_world.groupby("Year")["Lithium production - kt"].sum().reset_index()
    
    # Convert Year to Date Format
    yearly_production["ds"] = pd.to_datetime(yearly_production["Year"], format="%Y")
    yearly_production["y"] = yearly_production["Lithium production - kt"].astype(float)

    # Ensure No Missing Values
    yearly_production.fillna(method='ffill', inplace=True)

    # Remove zero or negative values before log transformation
    yearly_production = yearly_production[yearly_production["y"] > 0]

    # Apply Log Transformation
    yearly_production["y"] = np.log1p(yearly_production["y"])

    # Apply Growth Cap
    cap_value = yearly_production["y"].max() * 1.02
    yearly_production["cap"] = cap_value

    # Apply Rolling Average
    yearly_production["y"] = yearly_production["y"].rolling(window=3, min_periods=1).mean()

    # ‚úÖ Integrity Tests
    assert not yearly_production.empty, "‚ö†Ô∏è Processed dataset is empty!"
    assert "ds" in yearly_production.columns, "‚ö†Ô∏è Missing 'ds' column!"
    assert "y" in yearly_production.columns, "‚ö†Ô∏è Missing log-transformed column 'y'!"
    assert yearly_production["y"].min() > 0, "‚ö†Ô∏è Log transformation failed!"

    print("\n‚úÖ Data preprocessing test passed!")
    return yearly_production, cap_value

yearly_production, cap_value = test_preprocessing(df)

# === ‚úÖ Step 3: Prophet Model Training Testing ===
def test_model_training(yearly_production):
    """Test if Prophet model trains without errors."""
    prophet_model = Prophet(
        growth="logistic",
        seasonality_mode="additive",
        changepoint_prior_scale=0.007,
        yearly_seasonality=True,
        interval_width=0.65
    )
    prophet_model.add_seasonality(name="five_year_cycle", period=5, fourier_order=1)

    try:
        prophet_model.fit(yearly_production[["ds", "y", "cap"]])
        print("\n‚úÖ Model training test passed!")
        return prophet_model
    except Exception as e:
        raise RuntimeError(f"\n‚ùå Model training failed: {e}")

prophet_model = test_model_training(yearly_production)

# === ‚úÖ Step 4: Forecast Testing ===
def test_forecasting(prophet_model, yearly_production, cap_value):
    """Test if forecasting pipeline works correctly."""
    last_year = yearly_production["ds"].dt.year.max()
    years_to_forecast = (2030 - last_year) + 1

    future_df = prophet_model.make_future_dataframe(periods=years_to_forecast, freq='Y')
    future_df["cap"] = cap_value  # Apply growth cap

    # Make Predictions
    prophet_forecast = prophet_model.predict(future_df)

    # ‚úÖ Forecasting Pipeline Integrity Tests
    assert "ds" in prophet_forecast.columns, "‚ö†Ô∏è Missing 'ds' column in forecast!"
    assert "yhat" in prophet_forecast.columns, "‚ö†Ô∏è Missing 'yhat' column in forecast!"

    print("\n‚úÖ Forecasting test passed!")
    return prophet_forecast

prophet_forecast = test_forecasting(prophet_model, yearly_production, cap_value)

# === ‚úÖ Step 5: Performance Testing ===
def test_model_performance(yearly_production, prophet_model):
    """Test model performance using MAE and R¬≤."""
    test_size = 5
    train = yearly_production[:-test_size]
    test = yearly_production[-test_size:]

    test_forecast = prophet_model.predict(test[["ds", "cap"]])
    test_predictions = np.expm1(test_forecast["yhat"].values)  

    # Compute Errors
    mae_prophet = mean_absolute_error(np.expm1(test["y"].values), test_predictions)
    r2_prophet = r2_score(np.expm1(test["y"].values), test_predictions)

    print("\n‚úÖ **Model Performance Metrics:**")
    print(f"   - Mean Absolute Error (MAE): {mae_prophet:.2f}")
    print(f"   - R¬≤ Score: {r2_prophet:.4f}")


    print("\n‚úÖ Performance test passed!")
    return mae_prophet, r2_prophet

mae_prophet, r2_prophet = test_model_performance(yearly_production, prophet_model)

# === ‚úÖ Step 6: Save Results ===
output_path = r"C:\Users\MSI\Desktop\Critical_mineral\Test\Lithium_forecast_test.csv"
prophet_forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].to_csv(output_path, index=False)

print(f"\n‚úÖ Forecast saved to: {output_path}")
print("\n‚úÖ **All tests completed successfully!** üéâ")


  yearly_production.fillna(method='ffill', inplace=True)
03:11:32 - cmdstanpy - INFO - Chain [1] start processing



‚úÖ Data loading test passed!

‚úÖ Data preprocessing test passed!


03:11:32 - cmdstanpy - INFO - Chain [1] done processing



‚úÖ Model training test passed!

‚úÖ Forecasting test passed!

‚úÖ **Model Performance Metrics:**
   - Mean Absolute Error (MAE): 7961.11
   - R¬≤ Score: 0.8942

‚úÖ Performance test passed!

‚úÖ Forecast saved to: C:\Users\MSI\Desktop\Critical_mineral\Test\Lithium_forecast_test.csv

‚úÖ **All tests completed successfully!** üéâ


  dates = pd.date_range(
