In [4]:
# ✅ Import Required Libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, r2_score

# === ✅ Step 1: Load Data for Testing ===
def test_data_loading():
    """Test if dataset loads correctly."""
    file_path = r"C:\Users\MSI\Desktop\Critical_mineral\data\lithium-production.csv"
    if not os.path.exists(file_path):
        raise FileNotFoundError("⚠️ Dataset file not found! Check the file path.")
    
    df = pd.read_csv(file_path)
    assert not df.empty, "⚠️ Dataset is empty!"
    assert "Year" in df.columns, "⚠️ Missing 'Year' column!"
    assert "Lithium production - kt" in df.columns, "⚠️ Missing 'Lithium production - kt' column!"
    assert "Entity" in df.columns, "⚠️ Missing 'Entity' column!"
    print("\n✅ Data loading test passed!")
    return df

df = test_data_loading()

# === ✅ Step 2: Preprocessing Testing ===
def test_preprocessing(df):
    """Test if data preprocessing is applied correctly."""
    df_world = df[df["Entity"] == "World"]
    yearly_production = df_world.groupby("Year")["Lithium production - kt"].sum().reset_index()
    
    # Convert Year to Date Format
    yearly_production["ds"] = pd.to_datetime(yearly_production["Year"], format="%Y")
    yearly_production["y"] = yearly_production["Lithium production - kt"].astype(float)

    # Ensure No Missing Values
    yearly_production.fillna(method='ffill', inplace=True)

    # Remove zero or negative values before log transformation
    yearly_production = yearly_production[yearly_production["y"] > 0]

    # Apply Log Transformation
    yearly_production["y"] = np.log1p(yearly_production["y"])

    # Apply Growth Cap
    cap_value = yearly_production["y"].max() * 1.02
    yearly_production["cap"] = cap_value

    # Apply Rolling Average
    yearly_production["y"] = yearly_production["y"].rolling(window=3, min_periods=1).mean()

    # ✅ Integrity Tests
    assert not yearly_production.empty, "⚠️ Processed dataset is empty!"
    assert "ds" in yearly_production.columns, "⚠️ Missing 'ds' column!"
    assert "y" in yearly_production.columns, "⚠️ Missing log-transformed column 'y'!"
    assert yearly_production["y"].min() > 0, "⚠️ Log transformation failed!"

    print("\n✅ Data preprocessing test passed!")
    return yearly_production, cap_value

yearly_production, cap_value = test_preprocessing(df)

# === ✅ Step 3: Prophet Model Training Testing ===
def test_model_training(yearly_production):
    """Test if Prophet model trains without errors."""
    prophet_model = Prophet(
        growth="logistic",
        seasonality_mode="additive",
        changepoint_prior_scale=0.007,
        yearly_seasonality=True,
        interval_width=0.65
    )
    prophet_model.add_seasonality(name="five_year_cycle", period=5, fourier_order=1)

    try:
        prophet_model.fit(yearly_production[["ds", "y", "cap"]])
        print("\n✅ Model training test passed!")
        return prophet_model
    except Exception as e:
        raise RuntimeError(f"\n❌ Model training failed: {e}")

prophet_model = test_model_training(yearly_production)

# === ✅ Step 4: Forecast Testing ===
def test_forecasting(prophet_model, yearly_production, cap_value):
    """Test if forecasting pipeline works correctly."""
    last_year = yearly_production["ds"].dt.year.max()
    years_to_forecast = (2030 - last_year) + 1

    future_df = prophet_model.make_future_dataframe(periods=years_to_forecast, freq='Y')
    future_df["cap"] = cap_value  # Apply growth cap

    # Make Predictions
    prophet_forecast = prophet_model.predict(future_df)

    # ✅ Forecasting Pipeline Integrity Tests
    assert "ds" in prophet_forecast.columns, "⚠️ Missing 'ds' column in forecast!"
    assert "yhat" in prophet_forecast.columns, "⚠️ Missing 'yhat' column in forecast!"

    print("\n✅ Forecasting test passed!")
    return prophet_forecast

prophet_forecast = test_forecasting(prophet_model, yearly_production, cap_value)

# === ✅ Step 5: Performance Testing ===
def test_model_performance(yearly_production, prophet_model):
    """Test model performance using MAE and R²."""
    test_size = 5
    train = yearly_production[:-test_size]
    test = yearly_production[-test_size:]

    test_forecast = prophet_model.predict(test[["ds", "cap"]])
    test_predictions = np.expm1(test_forecast["yhat"].values)  

    # Compute Errors
    mae_prophet = mean_absolute_error(np.expm1(test["y"].values), test_predictions)
    r2_prophet = r2_score(np.expm1(test["y"].values), test_predictions)

    print("\n✅ **Model Performance Metrics:**")
    print(f"   - Mean Absolute Error (MAE): {mae_prophet:.2f}")
    print(f"   - R² Score: {r2_prophet:.4f}")


    print("\n✅ Performance test passed!")
    return mae_prophet, r2_prophet

mae_prophet, r2_prophet = test_model_performance(yearly_production, prophet_model)

# === ✅ Step 6: Save Results ===
output_path = r"C:\Users\MSI\Desktop\Critical_mineral\Test\Lithium_forecast_test.csv"
prophet_forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].to_csv(output_path, index=False)

print(f"\n✅ Forecast saved to: {output_path}")
print("\n✅ **All tests completed successfully!** 🎉")


  yearly_production.fillna(method='ffill', inplace=True)
03:11:32 - cmdstanpy - INFO - Chain [1] start processing



✅ Data loading test passed!

✅ Data preprocessing test passed!


03:11:32 - cmdstanpy - INFO - Chain [1] done processing



✅ Model training test passed!

✅ Forecasting test passed!

✅ **Model Performance Metrics:**
   - Mean Absolute Error (MAE): 7961.11
   - R² Score: 0.8942

✅ Performance test passed!

✅ Forecast saved to: C:\Users\MSI\Desktop\Critical_mineral\Test\Lithium_forecast_test.csv

✅ **All tests completed successfully!** 🎉


  dates = pd.date_range(
