# Data Forecasting

###  Import Libraries & Load Data

In [2]:
import pandas as pd
import numpy as np
import os
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from prophet import Prophet
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# Load cleaned data
data = pd.read_csv("../input/named_data_cleaned.csv")
data['datum'] = pd.to_datetime(data['datum'])
data = data.set_index('datum')

# Create output folder if it doesn't exist
os.makedirs("../output", exist_ok=True)

# Define best models per drug
best_models = {
    "Antiinflammatory": "HW",
    "Antirheumatic": "SARIMA",
    "Analgesics": "ARIMA",
    "Antipyretics": "LSTM",
    "Psycholeptics": "Prophet",
    "Sedatives": "ARIMA",
    "Bronchodilators": "LSTM",
    "Antihistamines": "LSTM"
}

### Define Forecasting Functions

In [11]:
# ARIMA
def forecast_arima(series, steps=12):
    model = ARIMA(series, order=(1,1,1)).fit()
    pred = model.forecast(steps)
    return np.clip(pred.values, 0, None)

# SARIMA
def forecast_sarima(series, steps=12):
    model = SARIMAX(series, order=(1,1,1), seasonal_order=(1,1,1,12)).fit(disp=False)
    pred = model.forecast(steps)
    return np.clip(pred.values, 0, None)

# Holt-Winters
def forecast_hw(series, steps=12):
    model = ExponentialSmoothing(series, seasonal='add', seasonal_periods=12).fit()
    pred = model.forecast(steps)
    return np.clip(pred.values, 0, None)

# LSTM
def forecast_lstm(series, steps=12, n_steps=5, epochs=50):
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(series.values.reshape(-1,1))
    
    X, y = [], []
    for i in range(len(scaled)-n_steps):
        X.append(scaled[i:i+n_steps])
        y.append(scaled[i+n_steps])
    X, y = np.array(X), np.array(y)
    
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(n_steps,1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit(X, y, epochs=epochs, verbose=0)
    
    input_seq = scaled[-n_steps:].reshape(1, n_steps, 1)
    preds = []
    for _ in range(steps):
        pred = model.predict(input_seq, verbose=0)
        preds.append(pred[0,0])
        pred_reshaped = pred.reshape(1,1,1)
        input_seq = np.concatenate([input_seq[:,1:,:], pred_reshaped], axis=1)
    
    preds = scaler.inverse_transform(np.array(preds).reshape(-1,1)).flatten()
    return np.clip(preds, 0, None)

### Generate Forecasts for All Drugs

In [16]:
# Use last date from your dataset
last_date = data.index.max()  # e.g., 2019-10-13

# Generate next 3 months weekly dates
forecast_steps = 12  # 12 weeks ≈ 3 months
forecast_dates = pd.date_range(start=last_date + pd.Timedelta(days=7), periods=forecast_steps, freq='W')

# Use these dates as index for forecast dataframe
forecast_df = pd.DataFrame(index=forecast_dates)


for col in data.columns:
    series = data[col].dropna()
    model_type = best_models[col]
    
    if model_type == "ARIMA":
        forecast_df[col] = forecast_arima(series, steps=forecast_steps)
    elif model_type == "SARIMA":
        forecast_df[col] = forecast_sarima(series, steps=forecast_steps)
    elif model_type == "HW":
        forecast_df[col] = forecast_hw(series, steps=forecast_steps)
    elif model_type == "LSTM":
        forecast_df[col] = forecast_lstm(series, steps=forecast_steps)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)
  super().__init__(**kwargs)


### Save Forecasted Data

In [17]:
forecast_df_reset = forecast_df.reset_index().rename(columns={'index':'datum'})
forecast_df_reset['datum'] = forecast_df_reset['datum'].dt.strftime('%Y-%m-%d')
forecast_df_reset.to_csv("../output/forecast_next_3_months.csv", index=False)
print("✅ Forecasted 3 months weekly data saved to ../output/forecast_weekly.csv")

✅ Forecasted 3 months weekly data saved to ../output/forecast_next_3_months.csv
