In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score


# Load your data
X_data = pd.read_csv("MODEL_DATA/NEWDATA/newData_combined_FD.csv").values
Y_data = pd.read_csv("MODEL_DATA/NEWDATA/newData_expanded_realHardParam.csv").values

# Data scaling
input_scaler = MinMaxScaler()
X_data_scaled = input_scaler.fit_transform(X_data)

target_scaler = MinMaxScaler()
Y_data_scaled = target_scaler.fit_transform(Y_data)

# Split the data into training, validation, and test sets
X_train, X_temp, Y_train, Y_temp = train_test_split(X_data_scaled, Y_data_scaled, test_size=0.2, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

# DataLoader for the training set
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(Y_train, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# DataLoader for the validation set
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(Y_val, dtype=torch.float32))
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)  # No need to shuffle for evaluation

# DataLoader for the test set
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(Y_test, dtype=torch.float32))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)  # No need to shuffle for evaluation

# Define the updated neural network model
class UpdatedNeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, lambda_reg):
        super(UpdatedNeuralNetwork, self).__init__()
        layers = []
        for i in range(len(hidden_sizes)):
            if i == 0:
                layers.append(nn.Linear(input_size, hidden_sizes[i]))
            else:
                layers.append(nn.Linear(hidden_sizes[i - 1], hidden_sizes[i]))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(hidden_sizes[i]))
            layers.append(nn.Dropout(0.5))
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.layers = nn.Sequential(*layers)
        self.lambda_reg = lambda_reg

    def forward(self, x):
        return self.layers(x)

    def l2_regularization(self):
        l2_reg = 0.0
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                l2_reg += torch.norm(layer.weight)
        return self.lambda_reg * l2_reg

# Custom loss function with regularization
class CustomMSELoss(nn.Module):
    def __init__(self, lambda_reg):
        super(CustomMSELoss, self).__init__()
        self.lambda_reg = lambda_reg

    def forward(self, outputs, targets, model):
        mse_loss = nn.MSELoss()(outputs, targets)
        return mse_loss + model.l2_regularization()

# Hyperparameters
input_size = X_train.shape[1]
hidden_sizes = [128, 128, 64]
output_size = Y_train.shape[1]
lambda_reg = 0.001

# Instantiate the updated model
model = UpdatedNeuralNetwork(input_size, hidden_sizes, output_size, lambda_reg)
criterion = CustomMSELoss(lambda_reg)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Initialize minimum validation loss and index
min_val_loss = float('inf')
min_val_loss_index = -1

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets, model)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_inputs = torch.tensor(X_val, dtype=torch.float32)
        val_outputs = model(val_inputs)
        loss_val = criterion(val_outputs, torch.tensor(Y_val, dtype=torch.float32), model)

    # Check if current validation loss is the minimum
    if loss_val < min_val_loss:
        min_val_loss = loss_val
        min_val_loss_index = epoch

    # Optional: Print loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model on the test set and print the parameters for the data point with the minimum validation loss
model.eval()
with torch.no_grad():
    test_inputs = torch.tensor(X_test, dtype=torch.float32)
    test_outputs = model(test_inputs)
    test_predictions = target_scaler.inverse_transform(test_outputs.numpy())

# Print the parameters for the data point with the minimum validation loss
if min_val_loss_index != -1:
    parameters_min_loss = test_predictions[min_val_loss_index * len(X_test) // num_epochs]
    print(f"Predicted parameters for test data point with the minimum validation loss (Epoch {min_val_loss_index + 1}):")
    print(parameters_min_loss)
    print()
else:
    print("No minimum validation loss found.")

print(f'Loss at min_val_loss_index ({min_val_loss_index + 1}): {loss_val.item():.4f}')

# Convert predictions to NumPy array
test_predictions_np = test_predictions

# Inverse transform the scaled predictions to get the original scale
predicted_parameters_test = target_scaler.inverse_transform(test_predictions_np)

# Ensure the shapes match for calculating R-squared
y_test_subset = Y_data[len(X_train) + len(X_val):]  # Extract the corresponding subset of the original Y_data
y_test_subset = y_test_subset[:len(predicted_parameters_test)]

# Calculate R-squared value
r2_value = r2_score(y_test_subset, predicted_parameters_test)
print(f"R-squared value on the test set: {r2_value:.4f}")


Epoch [100/100], Loss: 0.0661
Predicted parameters for test data point with the minimum validation loss (Epoch 91):
[6.0767514e-01 7.4054230e+02 6.5964244e-02 4.8257834e-01 1.1596931e+03
 1.5319138e+02 3.0968250e+02]

Loss at min_val_loss_index (91): 0.0611
R-squared value on the test set: -5293998.0702


In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# Define ideal parameters
ideal_params = {
    'c1': 0.5,
    'c2': 1300,
    'c3': 2.3e-14,
    'c4': 0.075,
    'c5': 773.18,
    'c6': 1039.37,
    'c7': 73.94
}

# Load the data
x_data_path = 'MODEL_DATA/NEWDATA/newData_combined_FD.csv'  
y_data_path = 'MODEL_DATA/NEWDATA/newData_expanded_realHardParam.csv' 

x_data = pd.read_csv(x_data_path)
y_data = pd.read_csv(y_data_path)

# Use a subset of the data for faster training
subset_percentage = 0.99
x_data_subset, _, y_data_subset, _ = train_test_split(x_data, y_data, test_size=1 - subset_percentage, random_state=42)

# Data scaling
input_scaler = MinMaxScaler()
X_data_scaled = input_scaler.fit_transform(x_data_subset)

target_scaler = MinMaxScaler()
Y_data_scaled = target_scaler.fit_transform(y_data_subset)

# Split the data
X_train, X_val, Y_train, Y_val = train_test_split(X_data_scaled, Y_data_scaled, test_size=0.2, random_state=42)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)

# DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(NeuralNetwork, self).__init__()
        layers = [nn.Linear(input_size, hidden_sizes[0]), nn.ReLU()]
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

# Hyperparameters
input_size = X_train.shape[1]
hidden_sizes = [128, 128, 64]
output_size = Y_train.shape[1]
percentage_range=(88,95)
# Instantiate the model
model = NeuralNetwork(input_size, hidden_sizes, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation
model.eval()  # set the model to evaluation mode
with torch.no_grad():
    predictions = model(X_val_tensor)
    # Inverse transform to get the predictions in the original scale
    predictions_np = predictions.numpy()
    predicted_parameters = target_scaler.inverse_transform(predictions_np)


Epoch [100/1000], Loss: 0.0682
Epoch [200/1000], Loss: 0.0518
Epoch [300/1000], Loss: 0.0555
Epoch [400/1000], Loss: 0.0551
Epoch [500/1000], Loss: 0.0421
Epoch [600/1000], Loss: 0.0396
Epoch [700/1000], Loss: 0.0396
Epoch [800/1000], Loss: 0.0435
Epoch [900/1000], Loss: 0.0447
Epoch [1000/1000], Loss: 0.0344
c1: Predicted = 5.14e-01, Ideal = 5.00e-01, Close = 92.59%
c2: Predicted = 1.33e+03, Ideal = 1.30e+03, Close = 94.96%
c3: Predicted = 2.34e-14, Ideal = 2.30e-14, Close = 88.40%
c4: Predicted = 7.19e-02, Ideal = 7.50e-02, Close = 91.52%
c5: Predicted = 7.44e+02, Ideal = 7.73e+02, Close = 91.59%
c6: Predicted = 1.02e+03, Ideal = 1.04e+03, Close = 93.06%
c7: Predicted = 7.76e+01, Ideal = 7.39e+01, Close = 92.59%


In [10]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import r2_score


# Load the data
x_data_path = 'MODEL_DATA/NEWDATA/newData_combined_FD.csv'  
y_data_path = 'MODEL_DATA/NEWDATA/newData_expanded_realHardParam.csv' 

x_data = pd.read_csv(x_data_path)
y_data = pd.read_csv(y_data_path)

# Use a subset of the data for faster training
subset_percentage = 0.99
x_data_subset, _, y_data_subset, _ = train_test_split(x_data, y_data, test_size=1 - subset_percentage, random_state=42)

# Data scaling
input_scaler = MinMaxScaler()
X_data_scaled = input_scaler.fit_transform(x_data_subset)

target_scaler = MinMaxScaler()
Y_data_scaled = target_scaler.fit_transform(y_data_subset)

# Split the data
X_train, X_val, Y_train, Y_val = train_test_split(X_data_scaled, Y_data_scaled, test_size=0.2, random_state=42)

# Split the remaining data into validation and test sets
X_test, X_val, Y_test, Y_val = train_test_split(X_val, Y_val, test_size=0.5, random_state=42)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)  # Add this line for testing data
Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)  # Add this line for validation targets

# DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(NeuralNetwork, self).__init__()
        layers = [nn.Linear(input_size, hidden_sizes[0]), nn.ReLU()]
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

# Hyperparameters
input_size = X_train.shape[1]
hidden_sizes = [128, 128, 64]
output_size = Y_train.shape[1]

# Instantiate the model
model = NeuralNetwork(input_size, hidden_sizes, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Initialize minimum validation loss and index
min_val_loss = float('inf')
min_val_loss_index = -1

# Validation
model.eval()
with torch.no_grad():
    predictions_val = model(X_val_tensor)
    loss_val = criterion(predictions_val, Y_val_tensor)

# Check if current validation loss is the minimum
if loss_val < min_val_loss:
    min_val_loss = loss_val
    min_val_loss_index = epoch

# Evaluate on the test set and print the parameters for the data point with the minimum validation loss
with torch.no_grad():
    predictions_test = model(X_test_tensor)
    predictions_test_np = predictions_test.numpy()
    predicted_parameters_test = target_scaler.inverse_transform(predictions_test_np)

# Print the parameters for the data point with the minimum validation loss
if min_val_loss_index != -1:
    parameters_min_loss = predicted_parameters_test[min_val_loss_index]
    print(f"Predicted parameters for test data point with the minimum validation loss (Epoch {min_val_loss_index + 1}):")
    print(parameters_min_loss)
    print()
else:
    print("No minimum validation loss found.")




Epoch [100/1000], Loss: 0.0648
Epoch [200/1000], Loss: 0.0510
Epoch [300/1000], Loss: 0.0491
Epoch [400/1000], Loss: 0.0428
Epoch [500/1000], Loss: 0.0446
Epoch [600/1000], Loss: 0.0371
Epoch [700/1000], Loss: 0.0417
Epoch [800/1000], Loss: 0.0332
Epoch [900/1000], Loss: 0.0402
Epoch [1000/1000], Loss: 0.0337
Predicted parameters for test data point with the minimum validation loss (Epoch 1000):
[5.7960927e-01 1.0960482e+03 3.4411706e-02 4.7349989e-01 1.4338960e+03
 4.6137509e+02 3.8619226e+02]



ValueError: Found input variables with inconsistent numbers of samples: [19997, 2000]

In [11]:
# Inverse transform the scaled predictions to get the original scale
predicted_parameters_test = target_scaler.inverse_transform(predictions_test_np)

# Ensure the shapes match for calculating R-squared
y_data_test_subset = y_data_subset[:len(predicted_parameters_test)]

# Calculate R-squared value
r2_value = r2_score(y_data_test_subset, predicted_parameters_test)
print(f"R-squared value on the test set: {r2_value:.4f}")

R-squared value on the test set: -0.5581
