In [9]:
import pandas as pd
import numpy as np
import requests
import re
import os
import joblib
import sys
from datetime import datetime, timedelta
from keras.models import load_model
import tensorflow as tf

# === Reproducibility ===
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# === 1. Get latest cleaned CSV from GitHub ===
def get_latest_cleaned_csv_url(user, repo, path="data/cleaned"):
    api_url = f"https://api.github.com/repos/{user}/{repo}/contents/{path}"
    response = requests.get(api_url)
    files = response.json()

    csv_files = []
    for file in files:
        name = file['name']
        if name.startswith("retail_sugar_prices_") and name.endswith(".csv"):
            match = re.search(r"(\d{4}-\d{2}-\d{2})", name)
            if match:
                csv_files.append((match.group(1), name))

    if not csv_files:
        raise ValueError("❌ No cleaned CSV files found.")
    
    latest_date, latest_file = sorted(csv_files)[-1]
    print(f"📁 Latest cleaned file: {latest_file}")
    return f"https://raw.githubusercontent.com/{user}/{repo}/main/{path}/{latest_file}"

# === 2. Load latest model and scaler ===
def get_latest_model_files(model_folder):
    files = os.listdir(model_folder)
    model_files = [f for f in files if f.startswith("lstm_model_rolling_diff")]
    scaler_files = [f for f in files if f.startswith("lstm_scaler_rolling_diff")]
    latest_model = sorted(model_files)[-1]
    latest_scaler = sorted(scaler_files)[-1]
    return os.path.join(model_folder, latest_model), os.path.join(model_folder, latest_scaler)

# === 3. Forecast N Months Ahead ===
def forecast_n_months(n=1, return_results=False):
    user = "Neeti3107"
    repo = "Foundation-Project_Group-14"
    model_dir = r"C:\Users\neeti\Documents\ISB_Class of Summer_2025\04 Term 4\Foundation\Foundation-Project_Group-14\models"

    # Load latest cleaned data
    url = get_latest_cleaned_csv_url(user, repo)
    df = pd.read_csv(url, parse_dates=['date'])
    df['month'] = df['date'].dt.to_period("M")
    df = df.groupby('month')['price'].mean().reset_index()
    df['month'] = df['month'].dt.to_timestamp()
    df.set_index('month', inplace=True)

    # Rolling mean diff
    df['rolling_mean'] = df['price'].rolling(window=12).mean()
    df['rolling_mean_diff'] = df['rolling_mean'] - df['rolling_mean'].shift()
    rolling_diff = df['rolling_mean_diff'].dropna()

    # Load model + scaler
    model_path, scaler_path = get_latest_model_files(model_dir)
    model = load_model(model_path)
    scaler = joblib.load(scaler_path)

    # Prepare last 12 input
    last_12 = rolling_diff[-12:].values.reshape(-1, 1)
    scaled_last_12 = scaler.transform(last_12)
    input_seq = scaled_last_12.reshape((1, 12, 1))

    last_rolling_mean = df['rolling_mean'].iloc[-1]
    last_date = df.index[-1]

    results = []
    for i in range(n):
        pred_scaled = model.predict(input_seq)
        pred_diff = scaler.inverse_transform(pred_scaled).flatten()[0]
        next_price = last_rolling_mean + pred_diff

        # Update for next step
        forecast_month = last_date + pd.DateOffset(months=1)
        forecast_month_str = forecast_month.strftime("%B %Y")

        results.append({
            "month": forecast_month_str,
            "price": round(next_price, 2)
        })

        # Prepare next input
        new_input = np.append(input_seq.flatten()[1:], scaler.transform([[pred_diff]]))
        input_seq = new_input.reshape((1, 12, 1))
        last_rolling_mean = next_price
        last_date = forecast_month

    if return_results:
        return results
    else:
        print("📅 Forecast Results:")
        for row in results:
            print(f"{row['month']}: ₹{row['price']}")

# === CLI Support ===
if __name__ == "__main__":
    try:
        n = int(sys.argv[1]) if len(sys.argv) > 1 else 1
    except ValueError:
        print("⚠️ Please pass an integer for number of months.")
        n = 1

    forecast_n_months(n)


⚠️ Please pass an integer for number of months.
📁 Latest cleaned file: retail_sugar_prices_2025-04-16.csv








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 329ms/step
📅 Forecast Results:
March 2025: ₹44.97
