## Building LSTM

In [2]:
# importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib

# Load dataset
df = pd.read_csv("highrange_25yrs_indian_coalmine_data.csv")

# Emission factors
CO2_PER_LITER_DIESEL = 2.68      # kg CO₂ per liter diesel
CO2_PER_TONNE_COAL = 1.9         # kg CO₂ per tonne coal mined

# ✅ Calculate CO₂ emissions directly from fuel + coal
df["CO2_Emitted_kg"] = (
    df["Fuel_Used_Liters"] * CO2_PER_LITER_DIESEL +
    df["Coal_Mined_Tonnes"] * CO2_PER_TONNE_COAL
)

# Features & target
features = df[["Fuel_Used_Liters", "Coal_Mined_Tonnes"]].values
target = df["CO2_Emitted_kg"].values.reshape(-1, 1)

# Scaling
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(features)
y_scaled = scaler_y.fit_transform(target)

# Save scalers
joblib.dump(scaler_x, "scaler_x.pkl")
joblib.dump(scaler_y, "scaler_y.pkl")

# Create sequences
TIME_STEPS = 14

def create_sequences(X, y, time_steps=14):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i + time_steps])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

X_seq, y_seq = create_sequences(X_scaled, y_scaled)
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, shuffle=False)

# Build LSTM model with Dropout
input_layer = Input(shape=(TIME_STEPS, 2))
x = LSTM(128, return_sequences=True)(input_layer)
x = Dropout(0.2)(x)
x = LSTM(64, return_sequences=True)(x)
x = Dropout(0.2)(x)
x = LSTM(32)(x)
x = Dense(32, activation='relu')(x)
output = Dense(1)(x)

model = Model(inputs=input_layer, outputs=output)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Early stopping to prevent overtraining
early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Train
model.fit(
    X_train, y_train,
    epochs=300,
    batch_size=32,
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)

# Save final model
model.save("lstm_co2_predictor.h5")
print("✅ Improved LSTM model trained and saved successfully!")


Epoch 1/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 47ms/step - loss: 0.0370 - mae: 0.1300 - val_loss: 0.0055 - val_mae: 0.0606
Epoch 2/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - loss: 0.0062 - mae: 0.0633 - val_loss: 0.0063 - val_mae: 0.0645
Epoch 3/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 43ms/step - loss: 0.0060 - mae: 0.0620 - val_loss: 0.0047 - val_mae: 0.0558
Epoch 4/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 42ms/step - loss: 0.0053 - mae: 0.0585 - val_loss: 0.0045 - val_mae: 0.0545
Epoch 5/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step - loss: 0.0050 - mae: 0.0565 - val_loss: 0.0047 - val_mae: 0.0559
Epoch 6/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 51ms/step - loss: 0.0050 - mae: 0.0562 - val_loss: 0.0047 - val_mae: 0.0555
Epoch 7/300
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s



✅ Improved LSTM model trained and saved successfully!


In [3]:
model.save("lstm_co2_predictor.h5")



In [3]:
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
print(f"MAPE: {mape:.2f}%")

MAPE: 6.89%
