In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import glob
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Add, InputLayer
from tensorflow.keras.optimizers import Adam
import pywt
import tensorflow as tf
from concurrent.futures import ThreadPoolExecutor

# Enable GPU usage
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Find CSV files
files = glob.glob(r'D:\A_NAUSHAD\E\Dataset\Book6.csv')

def wavelet_transform(data, wavelet='db1'):
    coeffs = pywt.wavedec(data, wavelet, mode='periodization')
    return coeffs

def inverse_wavelet_transform(coeffs, wavelet='db1'):
    return pywt.waverec(coeffs, wavelet, mode='periodization')

# Function to evaluate predictions
def evaluate_preds(y_true, y_pred):
    y_true = tf.cast(y_true, dtype=tf.float32)
    y_pred = tf.cast(y_pred, dtype=tf.float32)

    mae = tf.keras.metrics.mean_absolute_error(y_true, y_pred)
    mse = tf.keras.metrics.mean_squared_error(y_true, y_pred)
    rmse = tf.sqrt(mse)
    mape = tf.keras.metrics.mean_absolute_percentage_error(y_true, y_pred)
    mslr = tf.keras.metrics.mean_squared_logarithmic_error(y_true, y_pred)

    return {
        "mae": mae.numpy().mean(),
        "mse": mse.numpy().mean(),
        "rmse": rmse.numpy().mean(),
        "mape": mape.numpy().mean(),
        "mslr": mslr.numpy().mean(),
    }

# Proposed unique model with residual connections
def create_proposed_model(input_shape):
    model = Sequential()
    model.add(InputLayer(input_shape=input_shape))
    model.add(Dense(64, activation='relu'))

    # Adding residual connections
    model.add(Dense(64, activation='relu'))
    residual_1 = model.layers[-1].output

    model.add(Dense(64, activation='relu'))
    residual_2 = model.layers[-1].output

    model.add(Dense(64, activation='relu'))
    residual_3 = model.layers[-1].output

    # Summing up the residuals
    residual_sum = Add()([residual_1, residual_2, residual_3])
    
    model.add(Dense(1))  # Output layer
    
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mse')
    return model

# Function to save DataFrame to CSV
def save_to_csv(df, file_path):
    df.to_csv(file_path, index=False)

# Function to process and save results
def process_and_save_results(model_name, column, df, history, y_test_inverse, y_pred_inverse, model, X_train, feature_transformed):
    forecast_dates = pd.date_range(start='2023-01-28', end='2024-12-31', freq='H')
    df_forecast = pd.DataFrame(index=forecast_dates)
    
    feature_scaled = feature_transformed.reshape(-1, 1)
    scaler = MinMaxScaler()
    feature_scaled = scaler.fit_transform(feature_scaled)

    forecast_generator = TimeseriesGenerator(feature_scaled, np.zeros(len(feature_scaled)), length=len(X_train), sampling_rate=1, batch_size=1)
    predicted_values_forecast = model.predict(forecast_generator)
    predicted_values_forecast = scaler.inverse_transform(predicted_values_forecast)

    if len(predicted_values_forecast) > len(df_forecast):
        predicted_values_forecast = predicted_values_forecast[:len(df_forecast)]
    else:
        forecast_values = np.full((len(df_forecast), 1), np.nan)
        forecast_values[:len(predicted_values_forecast)] = predicted_values_forecast
        predicted_values_forecast = forecast_values

    df_forecast[column] = predicted_values_forecast
    df_forecast.to_csv(f'D:/A_NAUSHAD/E/RESULTS/FORE/{model_name}_{column}_Wave_fore.csv')

    plt.figure(figsize=(12, 6))
    plt.plot(df_forecast.index, df_forecast[column], label='Forecasted')
    plt.xlabel('Hour')
    plt.ylabel(column)
    plt.legend()
    plt.title(f'Forecast of Hourly {column} concentration using {model_name}')
    plt.show()
    
    pd.DataFrame(history.history['loss']).to_csv(f'D:/A_NAUSHAD/E/RESULTS/LOSS/{model_name}_{column}_Wave_loss.csv')
    pd.DataFrame(history.history['val_loss']).to_csv(f'D:/A_NAUSHAD/E/RESULTS/LOSS/{model_name}_{column}_Wave_val_loss.csv')

    predictions_train = model.predict(X_train)
    pd.DataFrame(predictions_train).to_csv(f'D:/A_NAUSHAD/E/RESULTS/PRED/{model_name}_{column}_Wave_train_pred.csv')
    predictions_test = model.predict(X_test)
    pd.DataFrame(predictions_test).to_csv(f'D:/A_NAUSHAD/E/RESULTS/PRED/{model_name}_{column}_Wave_test_pred.csv')

    eval_results = evaluate_preds(y_true=y_test_inverse, y_pred=y_pred_inverse)
    eval_df = pd.DataFrame.from_dict(eval_results, orient='index', columns=['value'])
    eval_df.to_csv(f'D:/A_NAUSHAD/E/RESULTS/EVAL/{model_name}_{column}_Wave_eval.csv')

# Dictionary of models
models = {
    "ProposedModel": create_proposed_model  # Added the proposed model here
}

# Loop over files and models
for file in files:
    df = pd.read_csv(file, parse_dates=['Date'], index_col=['Date'])

    with ThreadPoolExecutor(max_workers=4) as executor:
        tasks = []

        for column in df.columns:
            feature = df[[column]].values
            target = df[[column]].values

            feature_wavelet = wavelet_transform(feature)
            target_wavelet = wavelet_transform(target)

            # Reshape wavelet coefficients to 2D arrays
            feature_transformed = feature_wavelet[0].reshape(-1, 1)
            target_transformed = target_wavelet[0].reshape(-1, 1)

            X_train, X_test, y_train, y_test = train_test_split(feature_transformed, target_transformed, test_size=0.15, random_state=1, shuffle=False)
            X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=1, shuffle=False)

            # Ensure the input to Dense is 2D
            X_train = X_train.reshape(X_train.shape[0], -1)
            X_val = X_val.reshape(X_val.shape[0], -1)
            X_test = X_test.reshape(X_test.shape[0], -1)

            # Debug print to check the shapes of the data
            print(f'X_train shape: {X_train.shape}, y_train shape: {y_train.shape}')
            print(f'X_val shape: {X_val.shape}, y_val shape: {y_val.shape}')
            print(f'X_test shape: {X_test.shape}, y_test shape: {y_test.shape}')

            for model_name, create_model in models.items():
                model = create_model((X_train.shape[1],))
                
                lr_monitor = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=2, factor=0.5, cooldown=1)
                early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
                
                history = model.fit(
                    X_train, y_train, 
                    epochs=100,
                    validation_data=(X_val, y_val),
                    batch_size=128,
                    callbacks=[lr_monitor, early_stopping],
                    verbose=2
                )
                
                loss = model.evaluate(X_test)
                print(f'Test loss for column {column} using {model_name}: {loss}')
                
#                 plt.plot(model.history["loss"],label="loss")
#                 plt.plot(model.history["val_loss"],label="val_loss")
#                 plt.legend(loc="best")
#                 plt.xlabel("No. Of Epochs")
#                 plt.ylabel("mse score")
                
                y_pred = model.predict(X_test)
                
                y_test_wavelet = list(target_wavelet)
                y_test_wavelet[0] = y_test.flatten()
                y_pred_wavelet = list(target_wavelet)
                y_pred_wavelet[0] = y_pred.flatten()

                y_test_inverse = inverse_wavelet_transform(y_test_wavelet)
                y_pred_inverse = inverse_wavelet_transform(y_pred_wavelet)

                tasks.append(executor.submit(process_and_save_results, model_name, column, df, history, y_test_inverse, y_pred_inverse, model, X_train, feature_transformed))

        for task in tasks:
            task.result()





X_train shape: (23493, 1), y_train shape: (23493, 1)
X_val shape: (4146, 1), y_val shape: (4146, 1)
X_test shape: (4878, 1), y_test shape: (4878, 1)

Epoch 1/100

184/184 - 3s - loss: 25.2497 - val_loss: 0.0046 - lr: 0.0100 - 3s/epoch - 16ms/step
Epoch 2/100
184/184 - 1s - loss: 0.0223 - val_loss: 0.1909 - lr: 0.0100 - 539ms/epoch - 3ms/step
Epoch 3/100
184/184 - 1s - loss: 0.0395 - val_loss: 9.0951e-08 - lr: 0.0100 - 533ms/epoch - 3ms/step
Epoch 4/100
184/184 - 1s - loss: 2.2491e-06 - val_loss: 1.5107e-08 - lr: 0.0100 - 532ms/epoch - 3ms/step
Epoch 5/100
184/184 - 1s - loss: 2.0564e-06 - val_loss: 1.9627e-08 - lr: 0.0100 - 520ms/epoch - 3ms/step
Epoch 6/100
184/184 - 1s - loss: 1.9471e-06 - val_loss: 1.1706e-08 - lr: 0.0050 - 550ms/epoch - 3ms/step
Epoch 7/100
184/184 - 1s - loss: 1.8967e-06 - val_loss: 1.0447e-08 - lr: 0.0050 - 549ms/epoch - 3ms/step
Epoch 8/100
184/184 - 1s - loss: 1.8547e-06 - val_loss: 1.0868e-08 - lr: 0.0025 - 533ms/epoch - 3ms/step
Epoch 9/100
184/184 - 1s - lo

ValueError: Found array with dim 3. None expected <= 2.