Prediction for output : Max deformation

In [1]:
#necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline


In [26]:
#data loading (change it to whatever u have as path)
df = pd.read_csv("C:/Users/asmae/Desktop/augmented_data_simulation.csv")

#Target : Max deformation
X = df[['Vitesse', 'Temps', 'Profondeur']].values
y = df['Max deformation'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [27]:

#kernel
kernel = C(1.0, (1e-3, 1e3)) * RBF(length_scale=1, length_scale_bounds=(0.01,100))

#creating the gaussian process regressor model
gpr = GaussianProcessRegressor(kernel=kernel,alpha=1, n_restarts_optimizer=10)

#creating a pipeline that first scales the data then applies GPR
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('gpr', gpr)
])


In [28]:
# Perform 5-fold cross-validation
scores = cross_val_score(pipeline, X, y, cv=5, scoring='neg_mean_squared_error')

# Convert scores to positive MSE scores
mse_scores = -scores

# Calculate the average MSE and standard deviation
print(f"Average MSE: {mse_scores.mean()}, Standard Deviation: {mse_scores.std()}")


Average MSE: 0.04335113643553897, Standard Deviation: 0.009016311489399662


In [29]:
pipeline.fit(X_train, y_train)

In [36]:

# Predict on the test set
y_pred = pipeline.predict(X_test)



In [40]:
# Fixed values for speed and depth ( vitesse - profondeur)
vitesse_new = 80  
profondeur_new = 2

#range of time points at which we want to predict the outputs
time_points = np.array([0.0004,
0.00045,
0.0005,
0.00055,
0.0006,
0.00065,
0.0007,
0.00075,
0.0008,
0.00085,
0.0009,
])


testing


In [41]:

#new input data 
new_data = np.column_stack((np.full(time_points.shape, vitesse_new),
                            time_points,
                            np.full(time_points.shape, profondeur_new)))


In [42]:
new_predictions = pipeline.predict(new_data)

for time, pred in zip(time_points, new_predictions):
    print(f"Temps: {time}, Max deformation: {pred:.20f}")
    

Temps: 0.0004, Max deformation: 1.14472689381601067105
Temps: 0.00045, Max deformation: 1.16684501136580909275
Temps: 0.0005, Max deformation: 1.18660922928784184904
Temps: 0.00055, Max deformation: 1.20395730571987735757
Temps: 0.0006, Max deformation: 1.21883940840757287738
Temps: 0.00065, Max deformation: 1.23121833250993573117
Temps: 0.0007, Max deformation: 1.24106961976706919870
Temps: 0.00075, Max deformation: 1.24838157895286205878
Temps: 0.0008, Max deformation: 1.25315520854890283253
Temps: 0.00085, Max deformation: 1.25540402356681335405
Temps: 0.0009, Max deformation: 1.25515378940209920700
