# 📈 Forecasting Patient Volume using ARIMA and SARIMA
This notebook prepares time series data from patient records and forecasts future values using ARIMA and SARIMA models.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt


In [None]:
# Load original data
df = pd.read_csv("PatientVolumeTS.csv")
df['Full_Date'] = pd.to_datetime(df['Full_Date'], errors='coerce')
df = df.dropna(subset=['Full_Date'])
df['Year'] = df['Full_Date'].dt.year

# Aggregate by year
patient_volume_by_year = df.groupby('Year').size().reset_index(name='Patient_Volume')

# Create complete yearly range
all_years = pd.DataFrame({'Year': range(df['Year'].min(), df['Year'].max() + 1)})
full_series = pd.merge(all_years, patient_volume_by_year, on='Year', how='left').fillna(0)
full_series['Patient_Volume'] = full_series['Patient_Volume'].astype(int)
full_series['Date'] = pd.to_datetime(full_series['Year'], format='%Y')
full_series.set_index('Date', inplace=True)

# Display series
full_series[['Patient_Volume']].plot(figsize=(12,5), marker='o', title='📊 Patient Volume Over Time')
plt.ylabel("Patient Volume")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Use data up to the last 5 years for training
train = full_series.iloc[:-5]
test = full_series.iloc[-5:]

train_series = train['Patient_Volume']
test_series = test['Patient_Volume']


In [None]:
# ARIMA Model
arima_model = ARIMA(train_series, order=(1,1,1))
arima_result = arima_model.fit()

# Forecast
arima_forecast = arima_result.forecast(steps=5)
arima_forecast.index = test.index

# Plot
plt.figure(figsize=(12,5))
plt.plot(train_series, label="Train")
plt.plot(test_series, label="Actual")
plt.plot(arima_forecast, label="ARIMA Forecast", linestyle='--')
plt.title("🔮 ARIMA Forecast vs Actual")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Metrics
arima_mae = mean_absolute_error(test_series, arima_forecast)
arima_rmse = sqrt(mean_squared_error(test_series, arima_forecast))
print(f"ARIMA MAE: {arima_mae:.2f}")
print(f"ARIMA RMSE: {arima_rmse:.2f}")


In [None]:
# SARIMA Model
sarima_model = SARIMAX(train_series, order=(1,1,1), seasonal_order=(1,1,1,12))
sarima_result = sarima_model.fit(disp=False)

# Forecast
sarima_forecast = sarima_result.forecast(steps=5)
sarima_forecast.index = test.index

# Plot
plt.figure(figsize=(12,5))
plt.plot(train_series, label="Train")
plt.plot(test_series, label="Actual")
plt.plot(sarima_forecast, label="SARIMA Forecast", linestyle='--')
plt.title("🔮 SARIMA Forecast vs Actual")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Metrics
sarima_mae = mean_absolute_error(test_series, sarima_forecast)
sarima_rmse = sqrt(mean_squared_error(test_series, sarima_forecast))
print(f"SARIMA MAE: {sarima_mae:.2f}")
print(f"SARIMA RMSE: {sarima_rmse:.2f}")


In [None]:
print("📊 Model Comparison:")
print(f"ARIMA  -> MAE: {arima_mae:.2f}, RMSE: {arima_rmse:.2f}")
print(f"SARIMA -> MAE: {sarima_mae:.2f}, RMSE: {sarima_rmse:.2f}")
better_model = "SARIMA" if sarima_rmse < arima_rmse else "ARIMA"
print(f"✅ Better model based on RMSE: {better_model}")
