In [None]:
import json
import numpy as np
import geopandas as gpd
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from cmath import nan
from geopandas import GeoDataFrame
import tensorflow as tf
import pandas as pd
import numpy as np
from math import sqrt
import warnings
from sklearn.metrics import mean_squared_error, mean_absolute_error
from numpy import array, split, nan
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed
import matplotlib.pyplot as plt
from tqdm import tqdm

In [12]:
warnings.filterwarnings("ignore", category=FutureWarning)

In [13]:
# Function to load and preprocess data
def load_data(file_path, index_col='Date', scaler=None):
    data = pd.read_csv(file_path)
    data = data.set_index(index_col)
    if scaler is None:
        scaler = MinMaxScaler(feature_range=(0, 1))
        data_scaled = scaler.fit_transform(data.iloc[:-366])
    else:
        data_scaled = scaler.transform(data)
    return data, data_scaled, scaler

In [14]:
# Function to split data into sequences
def split_sequence(sequence, n_steps_in, n_steps_out):
  X,y,x_test,y_test=[],[],[],[]
  for i in range(len(sequence)):
    end_ix=i+n_steps_in
    out_end_ix=end_ix+n_steps_out
    if out_end_ix>len(sequence):
      break
    seq_x,seq_y=sequence[i:end_ix],sequence[end_ix:out_end_ix]
    if i <= len(sequence)*.8:
       X.append(seq_x)
       y.append(seq_y)
    else:
       x_test.append(seq_x)
       y_test.append(seq_y)
  return np.array(X),np.array(y),np.array(x_test),np.array(y_test)

In [15]:
# Function to create and train LSTM model
def build_train_lstm(x_train, y_train, n_features, n_steps_out, n_neurons=100, n_layers=1, n_epoch=100):
    model = Sequential()
    model.add(LSTM(n_neurons, activation='relu', input_shape=(x_train.shape[1], n_features)))
    model.add(RepeatVector(n_steps_out))
    for _ in range(n_layers):
        model.add(LSTM(n_neurons, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(n_features)))
    model.compile(optimizer='adam', loss='mae')
    history = model.fit(x_train, y_train, epochs=n_epoch, validation_split=0.2, batch_size=64, verbose=0)
    return model, history

In [16]:
# Function to evaluate model and generate predictions
def evaluate_model(model, x_test, y_test, scaler):
    y_pred = model.predict(x_test)
    y_pred_unscaled = scaler.inverse_transform(y_pred.reshape(-1, x_test.shape[-1]))
    y_test_unscaled = scaler.inverse_transform(y_test.reshape(-1, x_test.shape[-1]))
    rmse = np.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled))
    mae = mean_absolute_error(y_test_unscaled, y_pred_unscaled)
    return rmse, mae, y_pred_unscaled, y_test_unscaled

In [17]:
# Function to plot learning curve
def plot_learning_curve(history):
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Learning Curve for LSTM')
    plt.legend()
    plt.show()

In [18]:

def data_formating (M_data,n_steps_out,y_test_un,y_pred):    
    COL= M_data.columns
    results = pd.DataFrame()
    results['Stations_Feature'] = COL
    for id_cols in range(1,n_steps_out+1,1): 
        results['Actual '+'Day ' +str(id_cols)] = nan
        results['Predicted '+ 'Day '+str(id_cols)] = nan
    for ith in range(0,n_steps_out,1):
        ids = ith + 1
        for idx in range(len(results)):
            results.iloc[idx,results.columns.get_loc('Actual '+'Day '+str(ids))] = y_test_un[0][idx] 
            results.iloc[idx,results.columns.get_loc('Predicted '+'Day '+str(ids))]= y_pred[0][idx]
    return(results)

In [None]:
# Main function
def main():
    # Load and preprocess data
    file_path = '..\Datasets\Input_data\Weather_Data.csv'
    M_data, ET_Data, scaler = load_data(file_path)
    
    # Define parameters
    n_steps_in, n_steps_out = 7, 1
    n_features = M_data.shape[1]
    
    # Split data into sequences
    X, y,xt,yt = split_sequence(ET_Data, n_steps_in, n_steps_out)
    
    # Reshape data for LSTM
    x_train = X.reshape((X.shape[0], X.shape[1], n_features))
    y_train = y.reshape((y.shape[0], y.shape[1], n_features))
    x_test = xt.reshape((xt.shape[0], xt.shape[1], n_features))
    
    # Build and train LSTM model
    model, history = build_train_lstm(x_train, y_train, n_features, n_steps_out,n_steps_in)
    
    # Plot learning curve
    plot_learning_curve(history)
    
    # # Generate predictions and evaluate model
    rmse, mae, y_pred, y_test_un = evaluate_model(model, x_test, yt, scaler)

    print(f'Total RMSE: {rmse}, MAE: {mae}')
    results = data_formating (M_data,n_steps_out,y_test_un,y_pred)
    
    # Save model
    model.save('Days_ahead_pred.keras')
    
    # Export predictions
    results.to_csv(r'..\Datasets\Output_data\Prediction.csv', index=False)
    from plotting import plot_results


if __name__ == "__main__":
    main()
