In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import datetime

# Load dataset

df = pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\AQI.csv')

In [2]:
df.describe()

Unnamed: 0,No. Stations,Index Value
count,53379.0,53430.0
mean,2.389348,121.662081
std,3.049632,81.358872
min,1.0,6.0
25%,1.0,63.0
50%,1.0,97.0
75%,2.0,156.0
max,28.0,500.0


In [10]:


df['date'] = pd.to_datetime(df['date'])

def train_bilstm_model(city_name, df):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='date')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['Index Value'])
    
    
    if len(city_data) <= 30:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum 31 days required.")
    
    scaler = MinMaxScaler()
    city_data.loc[:, 'Scaled_AQI'] = scaler.fit_transform(city_data[['Index Value']])
    
    look_back = 30  # Days of past data used for prediction
    X, y = [], []
    
    for i in range(len(city_data) - look_back):
        X.append(city_data['Scaled_AQI'].iloc[i:i+look_back].values)
        y.append(city_data['Scaled_AQI'].iloc[i+look_back])
    
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    
    model = Sequential([
        Bidirectional(LSTM(50, return_sequences=True), input_shape=(look_back, 1)),
        Dropout(0.2),
        Bidirectional(LSTM(50)),
        Dropout(0.2),
        Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mse')
    model.fit(X, y, epochs=20, batch_size=16, verbose=1)
    
    df.loc[df['City'].str.lower() == city_name.lower(), 'Scaled_AQI'] = scaler.transform(city_data[['Index Value']])
    return model, scaler, look_back

def predict_future_aqi(city_name, future_date, df, model, scaler, look_back):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='date')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['Index Value'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data.loc[:, 'Scaled_AQI'] = scaler.transform(city_data[['Index Value']])
    last_days = city_data['Scaled_AQI'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    pred_scaled = model.predict(last_days)
    pred_aqi = scaler.inverse_transform(pred_scaled.reshape(-1, 1))[0][0]
    
    confidence = 1 - abs(model.evaluate(last_days, pred_scaled, verbose=0))  # Simple confidence metric
    
    return pred_aqi, confidence

# Example Usage
city = "Agra"
future_date = "2025-02-27"
model, scaler, look_back = train_bilstm_model(city, df)
pred_aqi, confidence = predict_future_aqi(city, future_date, df, model, scaler, look_back)
print(f"Predicted AQI for {city} on {future_date}: {pred_aqi:.2f} (Confidence: {confidence:.2%})")


Epoch 1/20


  super().__init__(**kwargs)


[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step - loss: 0.0227
Epoch 2/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0131
Epoch 3/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0131
Epoch 4/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0117
Epoch 5/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0113
Epoch 6/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0112
Epoch 7/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0113
Epoch 8/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0105
Epoch 9/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0103
Epoch 10/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - lo

In [17]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import datetime
import joblib

# Load dataset


df['date'] = pd.to_datetime(df['date'])

def train_bilstm_model(city_name, df):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='date')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['Index Value'])
    
    
    if len(city_data) <= 30:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum 31 days required.")
    
    scaler = MinMaxScaler()
    city_data.loc[:, 'Scaled_AQI'] = scaler.fit_transform(city_data[['Index Value']])
    
    look_back = 30  # Days of past data used for prediction
    X, y = [], []
    
    for i in range(len(city_data) - look_back):
        X.append(city_data['Scaled_AQI'].iloc[i:i+look_back].values)
        y.append(city_data['Scaled_AQI'].iloc[i+look_back])
    
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    
    model = Sequential([
        Bidirectional(LSTM(50, return_sequences=True), input_shape=(look_back, 1)),
        Dropout(0.2),
        Bidirectional(LSTM(50)),
        Dropout(0.2),
        Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mse')
    model.fit(X, y, epochs=20, batch_size=16, verbose=1)
    
    df.loc[df['City'].str.lower() == city_name.lower(), 'Scaled_AQI'] = scaler.transform(city_data[['Index Value']])
    return model, scaler, look_back

def predict_future_aqi(city_name, future_date, df, model, scaler, look_back, n_simulations=50, compute_mape=True):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='date')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['Index Value'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data.loc[:, 'Scaled_AQI'] = scaler.transform(city_data[['Index Value']])
    last_days = city_data['Scaled_AQI'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    predictions = np.array([model.predict(last_days) for _ in range(n_simulations)])
    pred_scaled_mean = predictions.mean()
    pred_scaled_std = predictions.std()
    pred_aqi = scaler.inverse_transform([[pred_scaled_mean]])[0][0]
    
    lower_bound = scaler.inverse_transform([[pred_scaled_mean - 1.96 * pred_scaled_std]])[0][0]
    upper_bound = scaler.inverse_transform([[pred_scaled_mean + 1.96 * pred_scaled_std]])[0][0]
    
    
    if compute_mape:
        actual_aqi = city_data['Index Value'].iloc[-1]
        predicted_values = scaler.inverse_transform(predictions.mean(axis=0).reshape(-1, 1)).flatten()
        mape = np.mean(np.abs((actual_aqi - predicted_values) / actual_aqi)) * 100
        print(f"MAPE (Mean Absolute Percentage Error): {mape:.2f}%")
    
    return pred_aqi, lower_bound, upper_bound
joblib.dump(model, "ml_model.pkl")


# Example Usage
city = "Agra"
future_date = "2025-02-27"
model, scaler, look_back = train_bilstm_model(city, df)
pred_aqi, lower_bound, upper_bound = predict_future_aqi(city, future_date, df, model, scaler, look_back)
print(f"Predicted AQI for {city} on {future_date}: {pred_aqi:.2f} (95% CI: {lower_bound:.2f} - {upper_bound:.2f})")


  super().__init__(**kwargs)


Epoch 1/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 17ms/step - loss: 0.0247
Epoch 2/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - loss: 0.0128
Epoch 3/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0118
Epoch 4/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0107
Epoch 5/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0121
Epoch 6/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0126
Epoch 7/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - loss: 0.0105
Epoch 8/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0125
Epoch 9/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.0106
Epoch 10/20
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15m

In [19]:
model.save("model.h5")




In [22]:
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanSquaredError

modell = load_model("model.h5", custom_objects={"mse": MeanSquaredError()})




In [47]:
import streamlit as st
import pandas as pd
import joblib
import plotly.express as px

# Load Model
model = joblib.load("ml_model.pkl")

# Streamlit UI
st.title("🔍 ML Model Prediction Dashboard")

st.sidebar.header("Upload CSV Data")
uploaded_file = st.sidebar.file_uploader("Upload CSV", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("📊 **Uploaded Data:**", df.head())

    if st.button("Make Predictions"):
        predictions = model.predict(df)
        df["Prediction"] = predictions
        st.write("✅ **Predictions:**", df)

        # Visualization
        fig = px.scatter(df, x=df.index, y="Prediction", title="Predictions Over Time")
        st.plotly_chart(fig)

st.sidebar.markdown("### Built with ❤️ using Streamlit")


DeltaGenerator(_root_container=1, _parent=DeltaGenerator())



DeltaGenerator(_root_container=1, _parent=DeltaGenerator())

In [28]:
df.head()

Unnamed: 0,date,City,No. Stations,Air Quality,Index Value,Prominent Pollutant,Region,Scaled_AQI
0,2015-05-01,Chennai,,Satisfactory,87,CO,Eastern Coastal Region,
1,2015-05-01,Varanasi,,Moderate,157,PM10,Indo-Gangetic Region,
2,2015-05-01,Hyderabad,,Moderate,189,PM2.5,Tropical wet & dry,
3,2015-05-01,Agra,,Moderate,179,PM10,Indo-Gangetic Region,0.33264
4,2015-05-02,Varanasi,,Moderate,156,PM10,Indo-Gangetic Region,


In [36]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, Input
from sklearn.preprocessing import MinMaxScaler
import datetime
from bayes_opt import BayesianOptimization

# Load dataset

df['date'] = pd.to_datetime(df['date'])

def train_bilstm_model(city_name, df):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='date')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['Index Value'])
    
    if len(city_data) <= 30:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum 31 days required.")
    
    scaler = MinMaxScaler()
    city_data['Scaled_AQI'] = scaler.fit_transform(city_data[['Index Value']])
    
    look_back = 30  # Days of past data used for prediction
    X, y = [], []
    
    for i in range(len(city_data) - look_back):
        X.append(city_data['Scaled_AQI'].iloc[i:i+look_back].values)
        y.append(city_data['Scaled_AQI'].iloc[i+look_back])
    
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    
    def bilstm_optimize(lstm_units, dropout_rate, learning_rate, epochs, batch_size):
        model = Sequential([
            Input(shape=(look_back, 1)),
            Bidirectional(LSTM(int(lstm_units), return_sequences=True)),
            Dropout(dropout_rate),
            Bidirectional(LSTM(int(lstm_units))),
            Dropout(dropout_rate),
            Dense(1)
        ])
        
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
        model.fit(X, y, epochs=int(epochs), batch_size=int(batch_size), verbose=0)
        loss = model.evaluate(X, y, verbose=0)
        return -loss  # Negative MSE for maximization
    
    pbounds = {
        'lstm_units': (32, 64),  # Reduced upper bound
        'dropout_rate': (0.1, 0.3),  # Reduced range
        'learning_rate': (1e-4, 5e-3),  # Reduced upper bound
        'epochs': (10, 30),  # Reduced max epochs
        'batch_size': (8, 16)  # Reduced max batch size
    }
    
    optimizer = BayesianOptimization(f=bilstm_optimize, pbounds=pbounds, random_state=42)
    optimizer.maximize(init_points=3, n_iter=5)  # Reduced iterations
    
    best_params = optimizer.max['params']
    model = Sequential([
        Input(shape=(look_back, 1)),
        Bidirectional(LSTM(int(best_params['lstm_units']), return_sequences=True)),
        Dropout(best_params['dropout_rate']),
        Bidirectional(LSTM(int(best_params['lstm_units']))),
        Dropout(best_params['dropout_rate']),
        Dense(1)
    ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']), loss='mse')
    model.fit(X, y, epochs=int(best_params['epochs']), batch_size=int(best_params['batch_size']), verbose=1)
    
    df.loc[df['City'].str.lower() == city_name.lower(), 'Scaled_AQI'] = scaler.transform(city_data[['Index Value']])
    return model, scaler, look_back

def predict_future_aqi(city_name, future_date, df, model, scaler, look_back, n_simulations=30, compute_mape=True):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='date')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['Index Value'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data['Scaled_AQI'] = scaler.transform(city_data[['Index Value']])
    last_days = city_data['Scaled_AQI'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    predictions = np.array([model.predict(last_days) for _ in range(n_simulations)])
    pred_scaled_mean = predictions.mean()
    pred_scaled_std = predictions.std()
    pred_aqi = scaler.inverse_transform([[pred_scaled_mean]])[0][0]
    
    lower_bound = scaler.inverse_transform([[pred_scaled_mean - 1.96 * pred_scaled_std]])[0][0]
    upper_bound = scaler.inverse_transform([[pred_scaled_mean + 1.96 * pred_scaled_std]])[0][0]
    
    if compute_mape:
        actual_aqi = city_data['Index Value'].iloc[-1]
        predicted_values = scaler.inverse_transform(predictions.mean(axis=0).reshape(-1, 1)).flatten()
        mape = np.mean(np.abs((actual_aqi - predicted_values) / actual_aqi)) * 100
        print(f"MAPE (Mean Absolute Percentage Error): {mape:.2f}%")
    
    return pred_aqi, lower_bound, upper_bound

# Example Usage
city = "Agra"
future_date = "2025-03-15"
model, scaler, look_back = train_bilstm_model(city, df)
pred_aqi, lower_bound, upper_bound = predict_future_aqi(city, future_date, df, model, scaler, look_back)
print(f"Predicted AQI for {city} on {future_date}: {pred_aqi:.2f} (95% CI: {lower_bound:.2f} - {upper_bound:.2f})")


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... | lstm_u... |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m-0.008999[39m | [39m11.0     [39m | [39m0.2901   [39m | [39m24.64    [39m | [39m0.003033 [39m | [39m36.99    [39m |
| [35m2        [39m | [35m-0.008876[39m | [35m9.248    [39m | [35m0.1116   [39m | [35m27.32    [39m | [35m0.003045 [39m | [35m54.66    [39m |
| [39m3        [39m | [39m-0.009549[39m | [39m8.165    [39m | [39m0.294    [39m | [39m26.65    [39m | [39m0.00114  [39m | [39m37.82    [39m |
| [39m4        [39m | [39m-0.009116[39m | [39m9.116    [39m | [39m0.2138   [39m | [39m27.36    [39m | [39m0.002957 [39m | [39m54.98    [39m |
| [39m5        [39m | [39m-0.00934 [39m | [39m9.456    [39m | [39m0.1276   [39m | [39m27.15    [39m | [39m0.0003579[39m | [39m54.75    [39m |
| [39m6        [39m | [39m-0.01012 [39m | [

In [42]:
from tensorflow.keras.models import Sequential

# Example: Assume you have trained a BiLSTM model
model = Sequential()  # Your actual model should be defined and trained

# Save the trained model
model.save("model.h5")
print("✅ Model saved successfully!")




✅ Model saved successfully!
