In [None]:
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from statsmodels.tsa.statespace.sarimax import SARIMAX
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# Load dataset
df = pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\AQI.csv')

df['date'] = pd.to_datetime(df['date'])
df = df.drop_duplicates(subset=['City', 'date'])

# Count unique days per city
city_data_counts = df.groupby('City')['date'].nunique()
cities_with_2_years_data = city_data_counts[city_data_counts > 730].index.tolist()

models = {}
errors = {}

for city in df['City'].unique():
    df_city = df[df['City'] == city].copy()
    df_city.set_index('date', inplace=True)
    df_city = df_city[~df_city.index.duplicated(keep='first')]
    df_city = df_city.asfreq('D')
    
    df_city['Index Value'] = df_city['Index Value'].interpolate(method='linear')
    imputer = IterativeImputer(max_iter=5, random_state=42)
    df_city[['Index Value']] = imputer.fit_transform(df_city[['Index Value']])
    
    # Feature Engineering - Adding Lag Features
    df_city['lag_1'] = df_city['Index Value'].shift(1)
    df_city['lag_7'] = df_city['Index Value'].shift(7)
    df_city['lag_30'] = df_city['Index Value'].shift(30)
    df_city.dropna(inplace=True)
    
    df_city['day_of_year'] = df_city.index.dayofyear
    df_city['year'] = df_city.index.year
    X = df_city[['day_of_year', 'year', 'lag_1', 'lag_7', 'lag_30']]
    y = df_city['Index Value']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    
    model_xgb = XGBRegressor(objective='reg:squarederror', n_estimators=50, learning_rate=0.05, max_depth=3, n_jobs=-1)
    model_xgb.fit(X_train, y_train)
    y_pred_xgb = model_xgb.predict(X_test)
    error_xgb = mean_absolute_error(y_test, y_pred_xgb)
    
    if city in cities_with_2_years_data:
        y_train_sarima = y_train[-730:]  # Train SARIMAX on last 2 years
        model_sarima = SARIMAX(y_train_sarima, order=(1, 1, 1), seasonal_order=(1, 1, 1, 52), enforce_stationarity=False, enforce_invertibility=False).fit(disp=False)
        sarima_pred = model_sarima.forecast(steps=len(y_test))
        error_sarima = mean_absolute_error(y_test, sarima_pred)
    else:
        model_sarima, error_sarima = None, None
    
    # LSTM Model
    X_lstm, y_lstm = [], []
    sequence_length = 30
    for i in range(sequence_length, len(X_train)):
        X_lstm.append(X_train.iloc[i-sequence_length:i].values)
        y_lstm.append(y_train.iloc[i])
    X_lstm, y_lstm = np.array(X_lstm), np.array(y_lstm)
    
    model_lstm = Sequential([
        LSTM(50, return_sequences=True, input_shape=(sequence_length, X_train.shape[1])),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    model_lstm.compile(optimizer='adam', loss='mean_absolute_error')
    model_lstm.fit(X_lstm, y_lstm, epochs=10, batch_size=16, verbose=0)
    
    models[city] = (model_xgb, model_sarima, model_lstm)
    errors[city] = (error_xgb, error_sarima)

with open("aqi_prediction_models.pkl", "wb") as f:
    pickle.dump(models, f)
with open("aqi_prediction_errors.pkl", "wb") as f:
    pickle.dump(errors, f)

def predict_aqi(city, future_date):
    future_date = pd.to_datetime(future_date)
    with open("aqi_prediction_models.pkl", "rb") as f:
        models = pickle.load(f)
    with open("aqi_prediction_errors.pkl", "rb") as f:
        errors = pickle.load(f)
    
    if city not in models:
        return {"Error": "City not found in dataset"}
    
    model_xgb, model_sarima, model_lstm = models[city]
    error_xgb, error_sarima = errors.get(city, (None, None))
    future_features = pd.DataFrame({'day_of_year': [future_date.dayofyear], 'year': [future_date.year], 'lag_1': [np.nan], 'lag_7': [np.nan], 'lag_30': [np.nan]})
    xgb_pred = model_xgb.predict(future_features.dropna(axis=1, how='any'))[0]
    
    if model_sarima:
        sarima_pred = model_sarima.forecast(steps=1)[0]
        final_prediction = 0.3 * xgb_pred + 0.3 * sarima_pred
    else:
        final_prediction = xgb_pred
    
    lstm_pred = model_lstm.predict(np.array(future_features).reshape(1, 30, future_features.shape[1]))[0][0]
    final_prediction += 0.4 * lstm_pred
    confidence = max(0, 100 - ((error_xgb + (error_sarima if error_sarima else 0)) / 2))
    
    return {
        "City": city,
        "Future Date": future_date.strftime('%Y-%m-%d'),
        "Predicted AQI": round(final_prediction, 2),
        "Prediction Confidence": round(confidence, 2)
    }

# Example usage
city_input = input("Enter city name: ")
date_input = input("Enter future date (YYYY-MM-DD): ")
prediction = predict_aqi(city_input, date_input)
print(prediction)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  retu