In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

# Load your data
X_files = {
    'NDBR50': "MODEL_DATA/NEWDATA/newData_NDBR50_FD.csv",
    'NDBR20': "MODEL_DATA/NEWDATA/newData_NDBR20_FD.csv",
    'NDBR6': "MODEL_DATA/NEWDATA/newData_NDBR6_FD.csv",
    'CHD6': "MODEL_DATA/NEWDATA/newData_CHD6_FD.csv"
}

Y_file = "MODEL_DATA/NEWDATA/newData_expanded_realHardParam.csv"

geometries = ['NDBR50', 'NDBR20', 'NDBR6', 'CHD6']

X_data = {geometry: pd.read_csv(X_files[geometry], usecols=[0, 1]).values for geometry in geometries}
Y_data = pd.read_csv(Y_file).values

# Define the target values for 'c1' to 'c7'
target_values = np.array([0.5, 1300, 2.3 * 10**-14, 0.075, 773.18, 1039.37, 73.94])

# Initialize XGBoost models for each geometry
xgb_models = {geometry: xgb.XGBRegressor(objective='reg:squarederror') for geometry in geometries}

# Train XGBoost models and make predictions
predicted_parameters = {}
for geometry in geometries:
    X_sample = X_data[geometry]
    Y_sample = Y_data[:, geometries.index(geometry)]
    
    # Split the data into training and testing sets (you can also use cross-validation)
    X_train, X_test, Y_train, Y_test = train_test_split(X_sample, Y_sample, test_size=0.2, random_state=42)
    
    # Standardize features by removing the mean and scaling to unit variance
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Train the XGBoost model
    xgb_models[geometry].fit(X_train, Y_train)
    
    # Make predictions on the test set
    predictions = xgb_models[geometry].predict(X_test)
    
    # Calculate the mean squared error (MSE) or other evaluation metrics if needed
    mse = np.mean((predictions - Y_test) ** 2)
    print(f"Geometry: {geometry}, Mean Squared Error: {mse:.4f}")
    
    # Inverse transform the predictions to get 'c1' to 'c7' values
    predicted_parameters[geometry] = predictions * np.std(Y_sample) + np.mean(Y_sample)

# Print predicted 'c1' to 'c7' values for each geometry
for geometry, params in predicted_parameters.items():
    print(f'Predicted parameters for geometry {geometry}:')
    print(params)




Geometry: NDBR50, Mean Squared Error: 0.0513
Geometry: NDBR20, Mean Squared Error: 248776.8871
Geometry: NDBR6, Mean Squared Error: 0.0008
Geometry: CHD6, Mean Squared Error: 0.0594
Predicted parameters for geometry NDBR50:
[0.661067   0.6751424  0.584611   ... 0.7169777  0.6725036  0.79667914]
Predicted parameters for geometry NDBR20:
[518637.97 434968.2  342718.2  ... 400685.25 400835.28 175302.17]
Predicted parameters for geometry NDBR6:
[0.05122931 0.0510592  0.05132556 ... 0.05099822 0.05104309 0.05133993]
Predicted parameters for geometry CHD6:
[0.6396007  0.6073706  0.6075463  ... 0.6498552  0.6688322  0.67850286]


In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

# Load your data for the 'NDBR50' geometry
X_file = "MODEL_DATA/NEWDATA/newData_NDBR50_FD.csv"
Y_file = "MODEL_DATA/NEWDATA/newData_expanded_realHardParam.csv"

X_data = pd.read_csv(X_file, usecols=[0, 1]).values
Y_data = pd.read_csv(Y_file).values[:, 0]  # Selecting 'NDBR50' column

# Define the target values for 'c1' to 'c7'
target_values = np.array([0.5, 1300, 2.3 * 10**-14, 0.075, 773.18, 1039.37, 73.94])

# Initialize the XGBoost model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror')

# Split the data into training and testing sets (you can also use cross-validation)
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the XGBoost model
xgb_model.fit(X_train, Y_train)

# Make predictions on the test set
predictions = xgb_model.predict(X_test)

# Calculate the mean squared error (MSE) or other evaluation metrics if needed
mse = np.mean((predictions - Y_test) ** 2)
print(f"Mean Squared Error: {mse:.4f}")

# Inverse transform the predictions to get 'c1' to 'c7' values
predicted_parameters = predictions * np.std(Y_data) + np.mean(Y_data)

# Print predicted 'c1' to 'c7' values for the 'NDBR50' geometry
print('Predicted parameters for geometry NDBR50:')
print(predicted_parameters)

Mean Squared Error: 0.0513
Predicted parameters for geometry NDBR50:
[0.661067   0.6751424  0.584611   ... 0.7169777  0.6725036  0.79667914]
