In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load your data
X_files = {
    'NDBR50': "MODEL_DATA/NEWDATA/newData_NDBR50_FD.csv",
    'NDBR20': "MODEL_DATA/NEWDATA/newData_NDBR20_FD.csv",
    'NDBR6': "MODEL_DATA/NEWDATA/newData_NDBR6_FD.csv",
    'CHD6': "MODEL_DATA/NEWDATA/newData_CHD6_FD.csv"
}

Y_file = "MODEL_DATA/NEWDATA/newData_expanded_realHardParam.csv"

geometries = ['NDBR50', 'NDBR20', 'NDBR6', 'CHD6']

X_data = {geometry: pd.read_csv(X_files[geometry], usecols=[0, 1]).values for geometry in geometries}
Y_data = pd.read_csv(Y_file).values

# Define the target values for 'c1' to 'c7'
target_values = np.array([0.5, 1300, 2.3 * 10**-14, 0.075, 773.18, 1039.37, 73.94])

# Initialize SVR models for each geometry
svr_models = {geometry: SVR(kernel='linear') for geometry in geometries}

# Train SVR models and make predictions
predicted_parameters = {}
for geometry in geometries:
    X_sample = X_data[geometry]
    Y_sample = Y_data[:, geometries.index(geometry)]
    
    # Split the data into training and testing sets (you can also use cross-validation)
    X_train, X_test, Y_train, Y_test = train_test_split(X_sample, Y_sample, test_size=0.2, random_state=42)
    
    # Standardize features by removing the mean and scaling to unit variance
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Train the SVR model
    svr_models[geometry].fit(X_train, Y_train)
    
    # Make predictions on the test set
    predictions = svr_models[geometry].predict(X_test)
    
    # Calculate the mean squared error (MSE) or other evaluation metrics if needed
    mse = np.mean((predictions - Y_test) ** 2)
    print(f"Geometry: {geometry}, Mean Squared Error: {mse:.4f}")
    
    # Inverse transform the predictions to get 'c1' to 'c7' values
    predicted_parameters[geometry] = predictions * np.std(Y_sample) + np.mean(Y_sample)

# Print predicted 'c1' to 'c7' values for each geometry
for geometry, params in predicted_parameters.items():
    print(f'Predicted parameters for geometry {geometry}:')
    print(params)


Geometry: NDBR50, Mean Squared Error: 0.0640
Geometry: NDBR20, Mean Squared Error: 318507.1499
Geometry: NDBR6, Mean Squared Error: 0.0009
Geometry: CHD6, Mean Squared Error: 0.0740
Predicted parameters for geometry NDBR50:
[0.66789742 0.65655475 0.56576199 ... 0.64434438 0.68429448 0.74799144]
Predicted parameters for geometry NDBR20:
[594467.31991057 607735.15177902 821305.73199611 ... 641579.94084196
 550432.71430155 391978.43967089]
Predicted parameters for geometry NDBR6:
[0.05124584 0.05124584 0.05124584 ... 0.05124584 0.05124584 0.05124584]
Predicted parameters for geometry CHD6:
[0.65293428 0.6482226  0.5972862  ... 0.6335797  0.66303742 0.70077395]
