In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, initializers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
import joblib


In [2]:
dataset = ''

hr_file = f"D:/Ankit work/irs_2_users/data{dataset}/hr.csv"
hd_file = f"D:/Ankit work/irs_2_users/data{dataset}/hd.csv"
G_file = f"D:/Ankit work/irs_2_users/data{dataset}/G.csv"
Omega_file = f"D:/Ankit work/irs_2_users/data{dataset}/omega.csv"



W_file = f"D:/Ankit work/irs_2_users/data{dataset}/W.csv"
theta_file = f"D:/Ankit work/irs_2_users/data{dataset}/theta.csv"

scaler_X_file = "D:/Ankit work/irs_2_users/scaler_X.pkl"
scaler_Y_file = "D:/Ankit work/irs_2_users/scaler_Y.pkl"
modal_save_file = "D:/Ankit work/irs_2_users/my_trained_model.keras"

In [3]:
# Load input datasets
hr = pd.read_csv(hr_file, header=None)
hd = pd.read_csv(hd_file, header=None)
G = pd.read_csv(G_file, header=None)
Omega = pd.read_csv(Omega_file, header=None)

# Load output datasets
W = pd.read_csv(W_file, header=None)
Theta = pd.read_csv(theta_file, header=None)

# Concatenate input datasets (hr, hd, G) and output datasets (W, theta)
X = pd.concat([hr, hd, G, Omega], axis=1)
Y = pd.concat([W, Theta], axis=1)

# Standardize input data
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Optionally, scale output data (depending on your needs)
scaler_Y = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y)

print("X shape: ", X.shape)
print("Y shape: ", Y.shape)


X shape:  (555600, 620)
Y shape:  (555600, 116)


In [4]:
# Manually shuffle the dataset using pandas
X_shuffled = pd.DataFrame(X_scaled).sample(frac=1, random_state=42).reset_index(drop=True)
Y_shuffled = pd.DataFrame(Y_scaled).sample(frac=1, random_state=42).reset_index(drop=True)

# Split the data into train, validation, and test sets
X_train, X_temp, Y_train, Y_temp = train_test_split(X_scaled, Y_scaled, test_size=0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

print(X_train.shape, Y_train.shape)

(333360, 620) (333360, 116)


In [36]:
l2_lambda = 1e-08
dropout_rate = 0.001  # Set the dropout rate (0.2 = 20%)

In [37]:
from tensorflow.keras import models, layers, regularizers, initializers

# Define the neural network
model = models.Sequential()

# Input layer (shape based on input features)
model.add(layers.InputLayer(shape=(X_train.shape[1],)))

# Hidden layers with Batch Normalization, Activation, and Dropout
model.add(layers.Dense(1024, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate)) 

model.add(layers.Dense(1024, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate)) 

model.add(layers.Dense(512, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate)) 

# Hidden layers with Batch Normalization, Activation, and Dropout
model.add(layers.Dense(512, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate)) 

model.add(layers.Dense(256, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate))  # Dropout with dropout_rate probability

model.add(layers.Dense(150, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate))

model.add(layers.Dense(128, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate))

model.add(layers.Dense(128, kernel_initializer=initializers.HeNormal(), kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.Dropout(dropout_rate))

# Output layer (no activation function for regression)
model.add(layers.Dense(Y_train.shape[1]))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])


In [38]:
# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Create the ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6)

In [39]:
train_count = 1

In [40]:
history = model.fit(X_train, Y_train, epochs=100, validation_data=(X_val, Y_val), batch_size=128, shuffle=True, callbacks=[reduce_lr])

Epoch 1/100
[1m2605/2605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 20ms/step - loss: 1.0002 - mae: 0.8793 - val_loss: 0.8912 - val_mae: 0.8240 - learning_rate: 0.0010
Epoch 2/100
[1m2605/2605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 20ms/step - loss: 0.8805 - mae: 0.8170 - val_loss: 0.8654 - val_mae: 0.8049 - learning_rate: 0.0010
Epoch 3/100
[1m2605/2605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 20ms/step - loss: 0.8590 - mae: 0.8018 - val_loss: 0.8335 - val_mae: 0.7830 - learning_rate: 0.0010
Epoch 4/100
[1m2605/2605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 20ms/step - loss: 0.8237 - mae: 0.7784 - val_loss: 0.7921 - val_mae: 0.7573 - learning_rate: 0.0010
Epoch 5/100
[1m2605/2605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 20ms/step - loss: 0.7712 - mae: 0.7436 - val_loss: 0.7203 - val_mae: 0.7054 - learning_rate: 0.0010
Epoch 6/100
[1m2605/2605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 20ms/step - lo

In [41]:

# Evaluate the model on training, validation, and test sets
train_loss, train_mae = model.evaluate(X_train, Y_train)
val_loss, val_mae = model.evaluate(X_val, Y_val)
test_loss, test_mae = model.evaluate(X_test, Y_test)

# Get model predictions for calculating MAPE
# train_pred = model.predict(X_train)
# val_pred = model.predict(X_val)
# test_pred = model.predict(X_test)


# Create a remark about model performance (e.g., overfitting or good fit)
if train_loss < val_loss and train_loss < test_loss:
    remark = "Possible overfitting detected."
elif val_loss <= test_loss:
    remark = "Good fit."
else:
    remark = "Model underfits the data."



[1m10418/10418[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - loss: 0.1958 - mae: 0.3200
[1m3473/3473[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.2501 - mae: 0.3588
[1m3473/3473[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.2479 - mae: 0.3577


In [50]:

import sys
import numpy as np

# Assuming you already have your model trained, and 'history' stores training info
# Redirect model summary to a file
with open('D:/Ankit work/irs_2_users/model_performance_log.txt', "a", encoding="utf-8") as f:
    f.write('\n*******************************************************************************************************\n')
    f.write(f"Remark: {remark}\n")
    f.write(f"He intialized , L2 Regularized ({l2_lambda}), dropout({dropout_rate}), early stop (20), batch normalization\n" )
    # Save the number of hidden layers and neurons per layer
    # model.summary(print_fn=lambda x: f.write(x + "\n"))
    
    # Save the number of hidden layers and neurons per layer
    hidden_layers = [layer for layer in model.layers if isinstance(layer, tf.keras.layers.Dense) and layer != model.layers[-1]]  # exclude output layer
    f.write("\nNumber of hidden layers: {}\n".format(len(hidden_layers)))
    for i, layer in enumerate(hidden_layers):
        f.write("Layer {}: {} neurons\n".format(i + 1, layer.units))  # 'units' gives the number of neurons

    f.write("\nTraining loss: {}\n".format(train_loss))
    f.write("Validation loss: {}\n".format(val_loss))
    f.write("Test loss: {}\n".format(test_loss))
    f.write("\n")
    
    f.write("\nTraining and Validation Losses for each epoch:\n")
    for epoch in range(len(history.history['loss'])):
        if(epoch % 10 == 0):
            f.write("Epoch {}: Training loss = {:.4f}, Validation loss = {:.4f}\n".format(
                epoch + 1,
                history.history['loss'][epoch],
                history.history['val_loss'][epoch]
            ))
            
    f.write('*******************************************************************************************************\n')


In [51]:
# Save the trained model to a file (HDF5 format or TensorFlow SavedModel format)
model.save(modal_save_file)  # Save as HDF5

# Save the scaler to a file
joblib.dump(scaler_X, scaler_X_file)
joblib.dump(scaler_Y, scaler_Y_file)

['D:/Ankit work/irs_2_users/scaler_Y.pkl']

In [5]:
from tensorflow.keras.models import load_model
from tensorflow import keras

# Load the model
loaded_model = keras.models.load_model(modal_save_file)

# Load the saved scaler
scaler_X_loaded = joblib.load(scaler_X_file)
scaler_Y_loaded = joblib.load(scaler_Y_file)


In [15]:
# load_model = model
scaler_X_loaded = scaler_X
scaler_Y_loaded = scaler_Y

In [6]:
loaded_model.summary()

### PREDICTION

In [7]:
hr_testing_file = f"D:/Ankit work/irs_2_users/testing_data/hr.csv"
hd_testing_file = f"D:/Ankit work/irs_2_users/testing_data/hd.csv"
G_testing_file = f"D:/Ankit work/irs_2_users/testing_data/G.csv"
Omega_testing_file = f"D:/Ankit work/irs_2_users/testing_data/omega.csv"
Theta_testing_file = f"D:/Ankit work/irs_2_users/testing_data/theta.csv"

# Load input datasets
hr_testing = pd.read_csv(hr_testing_file, header=None)
hd_testing = pd.read_csv(hd_testing_file, header=None)
G_testing = pd.read_csv(G_testing_file, header=None)
Omega_testing = pd.read_csv(Omega_testing_file, header=None)

# Concatenate input datasets (hr, hd, G) and output datasets (W, theta)
X_testing = pd.concat([hr_testing, hd_testing, G_testing, Omega_testing], axis=1)

X_scaled_testing = scaler_X_loaded.transform(X_testing)
print(X_testing.shape)


(100, 620)


In [8]:
# Let's assume 'new_data_batch' is a batch of new inputs (e.g., multiple samples).
new_data_batch = X_scaled_testing

# Make predictions for the batch
scaled_predictions = loaded_model.predict(new_data_batch)
predictions = scaler_Y_loaded.inverse_transform(scaled_predictions)

# Print the predictions
print("Predictions for the batch of inputs:", predictions.shape)

# Evaluate the model on test data
test_loss, test_mae = loaded_model.evaluate(X_test, Y_test)

print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predictions for the batch of inputs: (100, 116)
[1m3473/3473[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 0.2479 - mae: 0.3577
Test Loss: 0.24813798069953918
Test MAE: 0.3576447367668152


In [9]:
import pandas as pd

# Assuming Y_pred is a NumPy array
# If it's not, convert it to a NumPy array: Y_pred = np.array(Y_pred)

# Convert to DataFrame
Y_pred_df = pd.DataFrame(predictions)  # Add appropriate column name(s)

# Save to CSV
Y_pred_df.to_csv('D:/Ankit work/irs_2_users/testing_data/common_pred.csv', index=False, header=None)  # index=False to avoid adding row numbers to the file


: 