In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
import joblib

# Load and filter the dataset
data = pd.read_excel('final.xlsx')

# Filter only non-renewable energy plants (Thermal)
non_renewable_data = data[data['Type Of Station'] == 'Thermal'].copy()

# Check for missing values and handle them
print("Missing values before handling:")
print(non_renewable_data.isnull().sum())

# Drop rows with missing values or impute them
non_renewable_data.dropna(subset=['Declared Capability (MWh)', 'Schedule (MWh)', 'Total Cost (Rs/Unit)'], inplace=True)

# Optional: Impute missing values in other columns if necessary
# non_renewable_data.fillna(method='ffill', inplace=True)  # Example of forward filling

print("Missing values after handling:")
print(non_renewable_data.isnull().sum())

# Ensure 'Date' column is in datetime format
non_renewable_data.loc[:, 'Date'] = pd.to_datetime(non_renewable_data['Date'])

# Set the 'Date' column as index
non_renewable_data = non_renewable_data.set_index('Date')

# Selecting the relevant features for LSTM
features = ['Declared Capability (MWh)', 'Schedule (MWh)', 'Total Cost (Rs/Unit)']

# Ensure all selected features exist in the dataframe
assert all(col in non_renewable_data.columns for col in features), "Some features are missing in the dataset"

# Scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(non_renewable_data[features])

# Create the input sequences for LSTM
def create_sequences(data, time_steps=30):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps])
        y.append(data[i + time_steps, 0])  # Predict 'Declared Capability (MWh)' (energy consumption)
    return np.array(X), np.array(y)

time_steps = 30
X, y = create_sequences(scaled_data, time_steps)

# Check if there is enough data
if len(X) == 0:
    raise ValueError("Not enough data to create sequences")

# Split data into training and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential()
model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))  # Predicting 1 output (energy consumption)

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))

# Save the trained model
model.save('energy_consumption_lstm_model.h5')
joblib.dump(scaler, 'scaler.save')

print("Model training complete and files saved.")


Missing values before handling:
Date                                           0
StateName                                      0
Station                                        0
Capacity Allocated to State (MW)           16148
Plant Capacity (MW)                        12478
Type Of Station                                0
Ownership                                      0
Variable Cost (Rs/Unit)                   168212
Fixed Cost (Rs/Unit)                      177664
Total Cost (Rs/Unit)                      155012
Declared Capability (MWh)                 181062
Schedule (MWh)                            180898
Deviation from Merit Order (MWh)          479235
Reason for Deviation / Remarks(if any)    498633
dtype: int64
Missing values after handling:
Date                                           0
StateName                                      0
Station                                        0
Capacity Allocated to State (MW)            7008
Plant Capacity (MW)                       



Model training complete and files saved.


In [20]:
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler

def predict_consumption(input_date):
    # Load the model and scaler
    model = load_model('energy_consumption_lstm_model.h5')
    scaler = joblib.load('scaler.save')

    # Ensure you have the data and preprocessing steps ready
    non_renewable_data = pd.read_excel('final.xlsx')
    non_renewable_data = non_renewable_data[non_renewable_data['Type Of Station'] == 'Thermal'].copy()
    non_renewable_data['Date'] = pd.to_datetime(non_renewable_data['Date'])
    non_renewable_data = non_renewable_data.set_index('Date')

    features = ['Declared Capability (MWh)', 'Schedule (MWh)', 'Total Cost (Rs/Unit)']
    non_renewable_data = non_renewable_data[features]

    # Handle missing values
    non_renewable_data.dropna(inplace=True)

    # Scaling
    scaled_data = scaler.transform(non_renewable_data)

    if len(input_date) == 10:  # 'YYYY-MM-DD' format
        date = pd.to_datetime(input_date)
        past_data = non_renewable_data.loc[:date][-30:]
        if len(past_data) < 30:
            print("Not enough data for prediction")
            return
        scaled_past_data = scaler.transform(past_data)
        scaled_past_data = np.expand_dims(scaled_past_data, axis=0)
        predicted_scaled_value = model.predict(scaled_past_data)
        predicted_value = scaler.inverse_transform([predicted_scaled_value[0]])
        print(f"Predicted Energy Consumption for {input_date}: {predicted_value[0][0]} MWh")

    elif len(input_date) == 7:  # 'YYYY-MM' format for a whole month
        month = pd.to_datetime(input_date).month
        year = pd.to_datetime(input_date).year
        monthly_data = non_renewable_data[(non_renewable_data.index.month == month) &
                                          (non_renewable_data.index.year == year)]
        past_trends = monthly_data['Declared Capability (MWh)']
        plt.figure(figsize=(10, 5))
        plt.plot(past_trends.index, past_trends, label='Past Consumption')
        plt.title(f"Energy Consumption Trends for {input_date}")
        plt.xlabel('Date')
        plt.ylabel('Energy Consumption (MWh)')
        plt.legend()
        plt.show()

    elif len(input_date) == 4:  # 'YYYY' format for a whole year
        year = pd.to_datetime(input_date).year
        yearly_data = non_renewable_data[non_renewable_data.index.year == year]
        past_trends = yearly_data['Declared Capability (MWh)']
        plt.figure(figsize=(10, 5))
        plt.plot(past_trends.index, past_trends, label='Past Consumption')
        plt.title(f"Energy Consumption Trends for {input_date}")
        plt.xlabel('Month')
        plt.ylabel('Energy Consumption (MWh)')
        plt.legend()
        plt.show()


In [21]:
# Writing Streamlit code to a Python file
with open('app.py', 'w') as f:
    f.write("""
import streamlit as st

st.title("Energy Consumption Forecasting for Non-Renewable Plants")

input_date = st.text_input("Enter Date (YYYY-MM-DD), Month (YYYY-MM), or Year (YYYY):")

# You can add the logic to predict energy consumption here
st.write("Prediction will be displayed here.")
""")


In [22]:
from pyngrok import ngrok

# Set your ngrok authtoken
ngrok.set_auth_token("2loTHHQEtm569ueN4QxvFQFpUs2_5VmcbJqRdNEFxcYfe5bhJ")

# Start Streamlit app
!streamlit run app.py &>/dev/null&

# Expose the Streamlit app using ngrok
public_url = ngrok.connect(addr='localhost:8501')
print(f"Streamlit app running at: {public_url}")


Streamlit app running at: NgrokTunnel: "https://73a4-104-199-147-244.ngrok-free.app" -> "http://localhost:8501"
