In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, Input
from sklearn.preprocessing import MinMaxScaler
from bayes_opt import BayesianOptimization
import joblib

def train_bilstm_pm25_model(city_name, df):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='Datetime')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['PM2.5'])
    
    if len(city_data) <= 30:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum 31 days required.")
    
    scaler = MinMaxScaler()
    city_data['Scaled_PM2.5'] = scaler.fit_transform(city_data[['PM2.5']])
    
    look_back = 30
    X, y = [], []
    
    for i in range(len(city_data) - look_back):
        X.append(city_data['Scaled_PM2.5'].iloc[i:i+look_back].values)
        y.append(city_data['Scaled_PM2.5'].iloc[i+look_back])
    
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    
    def bilstm_optimize(lstm_units, dropout_rate, learning_rate, epochs, batch_size):
        model = Sequential([
            Input(shape=(look_back, 1)),
            Bidirectional(LSTM(int(lstm_units), return_sequences=True)),
            Dropout(dropout_rate),
            Bidirectional(LSTM(int(lstm_units))),
            Dropout(dropout_rate),
            Dense(1)
        ])
        
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
        model.fit(X, y, epochs=int(epochs), batch_size=int(batch_size), verbose=0)
        loss = model.evaluate(X, y, verbose=0)
        return -loss
    
    pbounds = {
        'lstm_units': (32, 64),
        'dropout_rate': (0.1, 0.3),
        'learning_rate': (1e-4, 5e-3),
        'epochs': (10, 30),
        'batch_size': (8, 16)
    }
    
    optimizer = BayesianOptimization(f=bilstm_optimize, pbounds=pbounds, random_state=42)
    optimizer.maximize(init_points=3, n_iter=5)
    
    best_params = optimizer.max['params']
    model = Sequential([
        Input(shape=(look_back, 1)),
        Bidirectional(LSTM(int(best_params['lstm_units']), return_sequences=True)),
        Dropout(best_params['dropout_rate']),
        Bidirectional(LSTM(int(best_params['lstm_units']))),
        Dropout(best_params['dropout_rate']),
        Dense(1)
    ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']), loss='mse')
    model.fit(X, y, epochs=int(best_params['epochs']), batch_size=int(best_params['batch_size']), verbose=1)
    
    model.save("bilstm_pm25_model.h5")
    joblib.dump(scaler, "scaler.pkl")
    print("Model and scaler saved successfully!")
    
    return model, scaler, look_back

def load_model_and_scaler():
    model = tf.keras.models.load_model("bilstm_pm25_model.h5")
    scaler = joblib.load("scaler.pkl")
    print("Model and scaler loaded successfully!")
    return model, scaler

def predict_future_pm25(city_name, df, model, scaler, look_back, n_simulations=30):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='Datetime')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['PM2.5'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data['Scaled_PM2.5'] = scaler.transform(city_data[['PM2.5']])
    last_days = city_data['Scaled_PM2.5'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    predictions = np.array([model.predict(last_days) for _ in range(n_simulations)])
    pred_scaled_mean = predictions.mean()
    pred_scaled_std = predictions.std()
    pred_pm25 = scaler.inverse_transform([[pred_scaled_mean]])[0][0]
    
    lower_bound = scaler.inverse_transform([[pred_scaled_mean - 1.96 * pred_scaled_std]])[0][0]
    upper_bound = scaler.inverse_transform([[pred_scaled_mean + 1.96 * pred_scaled_std]])[0][0]
    
    return pred_pm25, lower_bound, upper_bound

# Example usage:
df = pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\sorted_aqi_hourly_dataset.csv')
df['Datetime'] = pd.to_datetime(df['Datetime'])
city_name = "Delhi"
model, scaler, look_back = train_bilstm_pm25_model(city_name, df)
model, scaler = load_model_and_scaler()
pred_pm25, lower, upper = predict_future_pm25(city_name, df, model, scaler, look_back)
print(f"Predicted PM2.5 for {city_name}: {pred_pm25:.2f} (95% CI: {lower:.2f} - {upper:.2f})")


|   iter    |  target   | batch_... | dropou... |  epochs   | learni... | lstm_u... |
-------------------------------------------------------------------------------------


KeyboardInterrupt: 

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, Input
from sklearn.preprocessing import MinMaxScaler
import joblib

def train_bilstm_pm25_model(city_name, df):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='Datetime')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['PM2.5'])
    
    if len(city_data) <= 30:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum 31 days required.")
    
    scaler = MinMaxScaler()
    city_data['Scaled_PM2.5'] = scaler.fit_transform(city_data[['PM2.5']])
    
    look_back = 30
    X, y = [], []
    
    for i in range(len(city_data) - look_back):
        X.append(city_data['Scaled_PM2.5'].iloc[i:i+look_back].values)
        y.append(city_data['Scaled_PM2.5'].iloc[i+look_back])
    
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    
    model = Sequential([
        Input(shape=(look_back, 1)),
        Bidirectional(LSTM(50, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(LSTM(50)),
        Dropout(0.2),
        Dense(1)
    ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    model.fit(X, y, epochs=20, batch_size=16, verbose=1)
    
    model.save("bilstm_pm25_model.h5")
    joblib.dump(scaler, "scaler.pkl")
    print("Model and scaler saved successfully!")
    
    return model, scaler, look_back

def load_model_and_scaler():
    model = tf.keras.models.load_model("bilstm_pm25_model.h5",custom_objects={"mse": tf.keras.losses.MeanSquaredError()})
    scaler = joblib.load("scaler.pkl")
    print("Model and scaler loaded successfully!")
    return model, scaler

def predict_future_pm25(city_name, df, model, scaler, look_back, n_simulations=30):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='Datetime')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['PM2.5'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data['Scaled_PM2.5'] = scaler.transform(city_data[['PM2.5']])
    last_days = city_data['Scaled_PM2.5'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    predictions = np.array([model.predict(last_days) for _ in range(n_simulations)])
    pred_scaled_mean = predictions.mean()
    pred_scaled_std = predictions.std()
    pred_pm25 = scaler.inverse_transform([[pred_scaled_mean]])[0][0]
    
    lower_bound = scaler.inverse_transform([[pred_scaled_mean - 1.96 * pred_scaled_std]])[0][0]
    upper_bound = scaler.inverse_transform([[pred_scaled_mean + 1.96 * pred_scaled_std]])[0][0]
    
    return pred_pm25, lower_bound, upper_bound

# Example usage:
df = pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\sorted_aqi_hourly_dataset.csv')
df['Datetime'] = pd.to_datetime(df['Datetime'])
city_name = "Delhi"
model, scaler, look_back = train_bilstm_pm25_model(city_name, df)
model, scaler = load_model_and_scaler()
pred_pm25, lower, upper = predict_future_pm25(city_name, df, model, scaler, look_back)
print(f"Predicted PM2.5 for {city_name}: {pred_pm25:.2f} (95% CI: {lower:.2f} - {upper:.2f})")


Epoch 1/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 14ms/step - loss: 0.0044
Epoch 2/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0017
Epoch 3/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0016
Epoch 4/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0014
Epoch 5/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0012
Epoch 6/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0012
Epoch 7/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 14ms/step - loss: 0.0013
Epoch 8/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0012
Epoch 9/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - loss: 0.0012
Epoch 10/20
[1m1172/1172[0m [32m━━━━━━━━━━━━━━━━━━━━



Model and scaler saved successfully!




Model and scaler loaded successfully!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 823ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━

In [3]:
def load_model_and_scaler():
    model = tf.keras.models.load_model("bilstm_pm25_model.h5", compile=False)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    scaler = joblib.load("scaler.pkl")
    print("Model and scaler loaded successfully!")
    return model, scaler


In [4]:
city_name = "Delhi"
model, scaler, look_back = train_bilstm_pm25_model(city_name, df)
model, scaler = load_model_and_scaler()
pred_pm25, lower, upper = predict_future_pm25(city_name, df, model, scaler, look_back)
print(f"Predicted PM2.5 for {city_name}: {pred_pm25:.2f} (95% CI: {lower:.2f} - {upper:.2f})")

Epoch 1/20


SystemError: \Objects\listobject.c:337: bad argument to internal function

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib

# Load trained model & scaler
def load_model_and_scaler():
    model = tf.keras.models.load_model("bilstm_pm25_model.h5")
    scaler = joblib.load("scaler.pkl")
    print("Model and scaler loaded successfully!")
    return model, scaler

# Function to predict PM2.5 for a given city
def predict_future_pm25(city_name, df, model, scaler, look_back=30, n_simulations=30):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='Datetime')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['PM2.5'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data['Scaled_PM2.5'] = scaler.transform(city_data[['PM2.5']])
    last_days = city_data['Scaled_PM2.5'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    predictions = np.array([model.predict(last_days) for _ in range(n_simulations)])
    pred_scaled_mean = predictions.mean()
    pred_scaled_std = predictions.std()
    pred_pm25 = scaler.inverse_transform([[pred_scaled_mean]])[0][0]
    
    lower_bound = scaler.inverse_transform([[pred_scaled_mean - 1.96 * pred_scaled_std]])[0][0]
    upper_bound = scaler.inverse_transform([[pred_scaled_mean + 1.96 * pred_scaled_std]])[0][0]
    
    return pred_pm25, lower_bound, upper_bound

# === Execution ===
df = pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\sorted_aqi_hourly_dataset.csv')
df['Datetime'] = pd.to_datetime(df['Datetime'])

city_name = input("Enter city name: ")  # User inputs city name

model, scaler = load_model_and_scaler()  # Load trained model & scaler
pred_pm25, lower, upper = predict_future_pm25(city_name, df, model, scaler)

print(f"Predicted PM2.5 for {city_name}: {pred_pm25:.2f} (95% CI: {lower:.2f} - {upper:.2f})")


TypeError: Could not locate function 'mse'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'keras.metrics', 'class_name': 'function', 'config': 'mse', 'registered_name': 'mse'}

In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import joblib
from datetime import datetime, timedelta
from sklearn.metrics import mean_squared_error

def load_model_and_scaler():
    model = tf.keras.models.load_model("bilstm_pm25_model.h5",custom_objects={"mse": tf.keras.losses.MeanSquaredError()})
    scaler = joblib.load("scaler.pkl")
    print("Model and scaler loaded successfully!")
    return model, scaler

def predict_future_pm25(city_name, df, model, scaler, look_back, future_date, n_simulations=30):
    city_data = df[df['City'].str.lower() == city_name.lower()].sort_values(by='Datetime')
    
    if city_data.empty:
        raise ValueError(f"No data available for {city_name}. Check spelling or dataset.")
    
    city_data = city_data.dropna(subset=['PM2.5'])
    
    if len(city_data) <= look_back:
        raise ValueError(f"Not enough historical data for {city_name}. Minimum {look_back+1} days required.")
    
    city_data['Scaled_PM2.5'] = scaler.transform(city_data[['PM2.5']])
    last_days = city_data['Scaled_PM2.5'].iloc[-look_back:].values.reshape((1, look_back, 1))
    
    future_date = datetime.strptime(future_date, "%Y-%m-%d")
    last_known_date = city_data['Datetime'].max()
    
    if future_date <= last_known_date:
        raise ValueError("Future date must be beyond the last recorded date in the dataset.")
    
    days_ahead = (future_date - last_known_date).days
    
    for _ in range(days_ahead):
        pred_scaled = model.predict(last_days)[0, 0]
        last_days = np.roll(last_days, -1)
        last_days[0, -1, 0] = pred_scaled
    
    pred_pm25 = scaler.inverse_transform([[pred_scaled]])[0][0]
    
    return pred_pm25

# Load model and make predictions
df = pd.read_csv(r'C:\Users\ASUS\Desktop\Climate1\sorted_aqi_hourly_dataset.csv')
df['Datetime'] = pd.to_datetime(df['Datetime'])
city_name = input("Enter city name: ")
future_date = input("Enter future date (YYYY-MM-DD): ")
look_back = 30  # Use the same look_back as during training

model, scaler = load_model_and_scaler()
pred_pm25 = predict_future_pm25(city_name, df, model, scaler, look_back, future_date)
print(f"Predicted PM2.5 for {city_name} on {future_date}: {pred_pm25:.2f}")

# Calculate MSE if actual data is available
future_actual = df[(df['City'].str.lower() == city_name.lower()) & (df['Datetime'] == future_date)]
if not future_actual.empty:
    actual_pm25 = future_actual['PM2.5'].values[0]
    mse = mean_squared_error([actual_pm25], [pred_pm25])
    print(f"Mean Squared Error (MSE): {mse:.4f}")
else:
    print("No actual data available for MSE calculation.")




Model and scaler loaded successfully!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━