In [1]:
import streamlit as st
import numpy as np
import pandas as pd
import joblib
from datetime import datetime, timedelta

# Load models and scaler
rf = joblib.load('rf_aqi_model.pkl')
scaler = joblib.load('scaler.pkl')

# Load dataset
data = pd.read_csv('almaty_aqi_dataset.csv')
data['date'] = pd.to_datetime(data['date'])

st.title("Almaty AQI Prediction â€” 3 Days Ahead")

st.write("Select a date to predict AQI for the next 3 days using real data.")

# Select date
available_dates = data['date'].dt.strftime('%Y-%m-%d').unique()
selected_date = st.selectbox("Select date", available_dates[::-1])  # reverse for latest first

# Get features for selected date
row = data[data['date'] == selected_date].copy()
if row.empty:
    st.warning("No data available for selected date.")
else:
    feature_names = [c for c in rf.feature_names_in_ if c not in ['date', 'city', 'AQI']]
    X_input = row[feature_names].reset_index(drop=True)
    X_scaled = scaler.transform(X_input)
    preds = []
    for i in range(3):
        pred = rf.predict(X_scaled)[0]
        preds.append(pred)
        # Update lag features for next day's prediction if present
        for lag in [f for f in feature_names if 'lag' in f]:
            X_input.loc[0, lag] = pred
        X_scaled = scaler.transform(X_input)
    base_date = pd.to_datetime(selected_date)
    for i, pred in enumerate(preds):
        st.success(f"Day {i+1} ({(base_date + timedelta(days=i+1)).strftime('%Y-%m-%d')}): Predicted AQI = {pred:.1f}")

st.caption("Predictions use actual data from your dataset for the selected date.")

FileNotFoundError: [Errno 2] No such file or directory: 'rf_aqi_model.pkl'