In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

# Actual remaining forest percentage data from 2006 to 2025
years = list(range(2006, 2026))
remaining_forest_percentages = [
    19.43, 19.00, 19.37, 19.75, 19.46, 18.53, 18.67,
    18.24, 18.26, 18.45, 18.52, 18.28, 18.26,
    18.37, 18.09, 18.37, 18.57, 18.18, 18.38, 18.03  # includes 2024, 2025 actual
]

# Separate actual data for modeling (up to 2023)
training_years = list(range(2006, 2024))
training_data = remaining_forest_percentages[:len(training_years)]

# Fit the ARIMA model on training data
forest_series = pd.Series(training_data, index=pd.Index(training_years, name="Year"))
stepwise_model = auto_arima(
    forest_series, seasonal=False, trace=False,
    error_action='ignore', suppress_warnings=True, stepwise=True
)
print(f"Selected ARIMA order: {stepwise_model.order}")

model = ARIMA(forest_series, order=stepwise_model.order)
model_fit = model.fit()

# Forecast for 2024 and 2025 with confidence intervals
forecast_years = [2024, 2025]
forecast_result = model_fit.get_forecast(steps=2)
forecast = forecast_result.predicted_mean
conf_int = forecast_result.conf_int()

forecast_series = pd.Series(forecast.values, index=forecast_years)

# Plotting both actual (including 2024–2025) and forecasted
plt.figure(figsize=(10, 5))

# Full actual data including 2024 & 2025
plt.plot(years, remaining_forest_percentages, marker='o', color='blue', label='Actual (2006–2025)')
for x, y in zip(years, remaining_forest_percentages):
    plt.text(x, y - 0.2, f'{y:.2f}', ha='center', va='bottom', fontsize=8, color='blue')

# Forecasted values for 2024–2025
plt.plot(forecast_series.index, forecast_series.values, marker='o', linestyle='--', color='orange', label='Forecasted (2024–2025)')
for x, y in zip(forecast_series.index, forecast_series.values):
    plt.text(x, y + 0.3, f'{y:.2f}', ha='center', va='top', fontsize=8, color='orange')

# Plot confidence intervals
plt.fill_between(forecast_years,
                 conf_int.iloc[:, 0],
                 conf_int.iloc[:, 1],
                 color='orange', alpha=0.2, label='95% Confidence Interval')

# Forecast start line
plt.axvline(x=2024, linestyle='--', color='gray', label='Forecast Start')

plt.title("Remaining Forest Percentage in Sundarban (2006–2025)")
plt.xlabel("Year")
plt.ylabel("Forest Percentage")
plt.xticks(ticks=range(2006, 2026), rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# Print forecast vs actual comparison
print("\nActual vs Forecasted (2024–2025):")
actual_future_values = [18.38, 18.03]
for i, year in enumerate(forecast_years):
    forecast_val = forecast_series[year]
    actual_val = actual_future_values[i]
    lower, upper = conf_int.iloc[i]
    diff = abs(forecast_val - actual_val)
    comment = "very close!" if diff < 0.05 else "a small gap"
    print(f"{year}: Forecast = {forecast_val:.2f}%, Actual = {actual_val:.2f}%, "
          f"CI = [{lower:.2f}, {upper:.2f}], Difference = {diff:.2f}% ({comment})")
