In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, initializers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
import joblib


In [2]:
dataset = 2

hr_file = f"D:/Ankit work/irs data/data{dataset}/hr.csv"
hd_file = f"D:/Ankit work/irs data/data{dataset}/hd.csv"
G_file = f"D:/Ankit work/irs data/data{dataset}/G.csv"

W_file = f"D:/Ankit work/irs data/data{dataset}/W.csv"
theta_file = f"D:/Ankit work/irs data/data{dataset}/theta.csv"

scaler_X_file = "D:/Ankit work/irs data/scaler_X.pkl"
scaler_Y_file = "D:/Ankit work/irs data/scaler_Y.pkl"
modal_save_file = "D:/Ankit work/irs data/my_trained_model.keras"

In [3]:
# Load input datasets
hr = pd.read_csv(hr_file, header=None)
hd = pd.read_csv(hd_file, header=None)
G = pd.read_csv(G_file, header=None)

# Load output datasets
W = pd.read_csv(W_file, header=None)
Theta = pd.read_csv(theta_file, header=None)

# Concatenate input datasets (hr, hd, G) and output datasets (W, theta)
X = pd.concat([hr, hd, G], axis=1)
Y = pd.concat([W, Theta], axis=1)

# Standardize input data
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Optionally, scale output data (depending on your needs)
scaler_Y = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y)

print("X shape: ", X.shape)
print("Y shape: ", Y.shape)


X shape:  (876650, 154)
Y shape:  (876650, 54)


In [4]:
# Manually shuffle the dataset using pandas
X_shuffled = pd.DataFrame(X_scaled).sample(frac=1, random_state=42).reset_index(drop=True)
Y_shuffled = pd.DataFrame(Y_scaled).sample(frac=1, random_state=42).reset_index(drop=True)

# Split the data into train, validation, and test sets
X_train, X_temp, Y_train, Y_temp = train_test_split(X_scaled, Y_scaled, test_size=0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

print(X_train.shape, Y_train.shape)

(525990, 154) (525990, 54)


In [11]:
l2_lambda = 0.0000000001
dropout_rate = 0.001  # Set the dropout rate (0.2 = 20%)

In [12]:
# Define the neural network
model = models.Sequential()

# Input layer (shape based on input features)
model.add(layers.InputLayer(shape=(X_train.shape[1],)))

# Hidden layers
model.add(layers.Dense(314, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.Dropout(dropout_rate)) 
model.add(layers.Dense(314, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.Dropout(dropout_rate))
model.add(layers.Dense(256, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.Dropout(dropout_rate)) 
model.add(layers.Dense(128, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.Dropout(dropout_rate)) 
model.add(layers.Dense(100, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.Dropout(dropout_rate)) 
model.add(layers.Dense(70, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))
model.add(layers.Dropout(dropout_rate)) 
model.add(layers.Dense(64, activation='relu', kernel_initializer=initializers.HeNormal(),  kernel_regularizer=regularizers.l2(l2_lambda)))

# Output layer
model.add(layers.Dense(Y_train.shape[1]))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [13]:
# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Create the ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6)

In [14]:
train_count = 1

In [15]:
history = model.fit(X_train, Y_train, epochs=100, validation_data=(X_val, Y_val), batch_size=128, shuffle=True, callbacks=[])

Epoch 1/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 5ms/step - loss: 0.9409 - mae: 0.8572 - val_loss: 0.6838 - val_mae: 0.6873
Epoch 2/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 0.6205 - mae: 0.6424 - val_loss: 0.4425 - val_mae: 0.5230
Epoch 3/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 0.4207 - mae: 0.5054 - val_loss: 0.3288 - val_mae: 0.4426
Epoch 4/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 0.3285 - mae: 0.4388 - val_loss: 0.2812 - val_mae: 0.4084
Epoch 5/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 0.2834 - mae: 0.4046 - val_loss: 0.2536 - val_mae: 0.3838
Epoch 6/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 0.2569 - mae: 0.3837 - val_loss: 0.2357 - val_mae: 0.3693
Epoch 7/100
[1m4110/4110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [16]:

# Evaluate the model on training, validation, and test sets
train_loss, train_mae = model.evaluate(X_train, Y_train)
val_loss, val_mae = model.evaluate(X_val, Y_val)
test_loss, test_mae = model.evaluate(X_test, Y_test)

# Get model predictions for calculating MAPE
train_pred = model.predict(X_train)
val_pred = model.predict(X_val)
test_pred = model.predict(X_test)


# Create a remark about model performance (e.g., overfitting or good fit)
if train_loss < val_loss and train_loss < test_loss:
    remark = "Possible overfitting detected."
elif val_loss <= test_loss:
    remark = "Good fit."
else:
    remark = "Model underfits the data."



[1m16438/16438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 917us/step - loss: 0.1521 - mae: 0.2876
[1m5480/5480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 887us/step - loss: 0.1560 - mae: 0.2906
[1m5480/5480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 895us/step - loss: 0.1560 - mae: 0.2908
[1m16438/16438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 837us/step
[1m5480/5480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 861us/step
[1m5480/5480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 850us/step


In [13]:

# Prepare the text to log the errors and remarks
log_text = (f"Training Loss MSE: {train_loss:.4f}, Validation Loss MSE: {val_loss:.4f}, Test Loss MSE: {test_loss:.4f}\n"
            f"Training MAE: {train_mae:.4f}, Validation MAE: {val_mae:.4f}, Test MAE: {test_mae:.4f}\n"
            f"Remark: {remark}\n"
            f"He intialized , L2 Regularized (0.0000001), Dropout (0.001) Early stop (Patience 20),\n" 
            # f"Learning rate scheduling (factor=0.5, patience=10, min_lr=1e-6)\n"
            f"layers: 2 * 314, 256, 128, 2 * 100, 64\n"
            "-------------------------------------------\n")

# Append the results to a text file
with open('D:\Ankit work\irs data\model_performance_log.txt', 'a') as file:
    file.write(log_text)

print("Training, validation, and test errors saved to 'model_performance_log.txt'.")

Training, validation, and test errors saved to 'model_performance_log.txt'.


In [5]:
# Save the trained model to a file (HDF5 format or TensorFlow SavedModel format)
# model.save(modal_save_file)  # Save as HDF5

# Save the scaler to a file
joblib.dump(scaler_X, scaler_X_file)
joblib.dump(scaler_Y, scaler_Y_file)

['D:/Ankit work/irs data/scaler_Y.pkl']

In [14]:
from tensorflow.keras.models import load_model
from tensorflow import keras

# Load the model
loaded_model = keras.models.load_model(modal_save_file)

# Load the saved scaler
scaler_X_loaded = joblib.load(scaler_X_file)
scaler_Y_loaded = joblib.load(scaler_Y_file)


In [15]:
# load_model = model
scaler_X_loaded = scaler_X
scaler_Y_loaded = scaler_Y

In [16]:
loaded_model.summary()

In [17]:
# Let's assume 'new_data' is a single sample with the same number of features as your training data.
new_data = [[value1, value2, value3, ..., valueN]]  # Replace with your input values

# Make prediction
prediction = model.predict(new_data)

# Print the prediction
print("Prediction for the single input:", prediction)


NameError: name 'value1' is not defined

### PREDICTION

In [21]:
hr_testing_file = f"D:/Ankit work/irs data/testing_data/hr.csv"
hd_testing_file = f"D:/Ankit work/irs data/testing_data/hd.csv"
G_testing_file = f"D:/Ankit work/irs data/testing_data/G.csv"

# Load input datasets
hr_testing = pd.read_csv(hr_testing_file, header=None)
hd_testing = pd.read_csv(hd_testing_file, header=None)
G_testing = pd.read_csv(G_testing_file, header=None)

# Concatenate input datasets (hr, hd, G) and output datasets (W, theta)
X_testing = pd.concat([hr_testing, hd_testing, G_testing], axis=1)

X_scaled_testing = scaler_X_loaded.transform(X_testing)
print(X_testing.shape)


(100, 154)


In [22]:
# Let's assume 'new_data_batch' is a batch of new inputs (e.g., multiple samples).
new_data_batch = X_scaled_testing

# Make predictions for the batch
scaled_predictions = loaded_model.predict(new_data_batch)
predictions = scaler_Y_loaded.inverse_transform(scaled_predictions)

# Print the predictions
print("Predictions for the batch of inputs:", predictions.shape)

# Evaluate the model on test data
test_loss, test_mae = loaded_model.evaluate(X_test, Y_test)

print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Predictions for the batch of inputs: (100, 54)
[1m5480/5480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1ms/step - loss: 0.1530 - mae: 0.2884
Test Loss: 0.15333764255046844
Test MAE: 0.2886272668838501


In [23]:
import pandas as pd

# Assuming Y_pred is a NumPy array
# If it's not, convert it to a NumPy array: Y_pred = np.array(Y_pred)

# Convert to DataFrame
Y_pred_df = pd.DataFrame(predictions)  # Add appropriate column name(s)

# Save to CSV
Y_pred_df.to_csv('D:/Ankit work/irs data/testing_data/common_pred.csv', index=False, header=None)  # index=False to avoid adding row numbers to the file


In [24]:
exit()

: 