In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import train_test_split
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel
from sklearn.ensemble import RandomForestRegressor
from SALib.analyze import sobol
from scipy.stats import qmc
from sklearn.pipeline import Pipeline
import joblib

#### Cylindrical Shell

In [21]:
df = pd.read_csv(r'cyl.csv')

In [None]:
train_set, test_set = train_test_split(
    df, test_size=0.20, shuffle=True, stratify=df['Strat_cat'], random_state=42)
for col in (train_set,test_set):
    col.drop(['Strat_cat', 'Ry'],axis = 1 , inplace = True)

In [23]:
X_train = train_set.iloc[:, :10].values
y_train = train_set.iloc[:, 10].values

X_test = test_set.iloc[:, :10].values
y_test = test_set.iloc[:, 10].values

In [24]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class LogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return np.log1p(X)
    
    def inverse_transform(self, X):
        return np.expm1(X)
    
    def get_feature_names_out(self, input_features=None):
        if input_features is None:
            return None
        return [f"{name}_log" for name in input_features]

In [25]:
qt_x = QuantileTransformer(n_quantiles=10, random_state=0, output_distribution='normal')
qt_y = LogTransformer()

# # X_train_scaled = qt_x.fit_transform(X_train)
# # X_test_scaled = qt_x.transform(X_test)

y_train_scaled = qt_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = qt_y.transform(y_test.reshape(-1, 1)).flatten()

In [26]:
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error

def compute_score(y_true, y_pred):
    return {
        "R2": f"{r2_score(y_true, y_pred):.3f}",
        "MAE": f"{mean_absolute_error(y_true, y_pred):.3f}",
        "RMSE": f"{root_mean_squared_error(y_true, y_pred):.3f}",
    }

In [None]:
kernel=(WhiteKernel(noise_level=0.6) + 1.0 * 
        Matern(length_scale=10, length_scale_bounds=(1e-05, 100000.0), nu = 1.5))

gpr = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=10,alpha=5e-9,random_state=42)

model_cyl = Pipeline([
    ('QuantileTransformer', qt_x),
    ('GPR', gpr)
])

model_cyl.fit(X_train, y_train_scaled)

In [None]:
joblib.dump(model_cyl, 'model_cyl.csv')

In [None]:
y_pred = model_cyl.predict(X_test)
y_pred = qt_y.inverse_transform(y_pred)
compute_score(y_test, y_pred)

#### Saddle Shell

In [None]:
df = pd.read_csv(r'saddle.csv')

In [None]:
train_set, test_set = train_test_split(
    df, test_size=0.20, shuffle=True, stratify=df['Strat_cat'], random_state=42)
for col in (train_set,test_set):
    col.drop(['Strat_cat'],axis = 1 , inplace = True)

In [None]:
X_train = train_set.iloc[:, :10].values
y_train = train_set.iloc[:, 10].values

X_test = test_set.iloc[:, :10].values
y_test = test_set.iloc[:, 10].values

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class LogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return np.log1p(X)
    
    def inverse_transform(self, X):
        return np.expm1(X)
    
    def get_feature_names_out(self, input_features=None):
        if input_features is None:
            return None
        return [f"{name}_log" for name in input_features]

In [None]:
qt_x = QuantileTransformer(n_quantiles=10, random_state=0, output_distribution='normal')
qt_y = LogTransformer()

# # X_train_scaled = qt_x.fit_transform(X_train)
# # X_test_scaled = qt_x.transform(X_test)

y_train_scaled = qt_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = qt_y.transform(y_test.reshape(-1, 1)).flatten()

In [None]:
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error

def compute_score(y_true, y_pred):
    return {
        "R2": f"{r2_score(y_true, y_pred):.3f}",
        "MAE": f"{mean_absolute_error(y_true, y_pred):.3f}",
        "RMSE": f"{root_mean_squared_error(y_true, y_pred):.3f}",
    }

In [None]:
kernel=(WhiteKernel(noise_level=0.6) + 1.0 * 
        Matern(length_scale=10, length_scale_bounds=(1e-05, 100000.0), nu = 1.5))

gpr = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=10,alpha=5e-9,random_state=42)

model_saddle = Pipeline([
    ('QuantileTransformer', qt_x),
    ('GPR', gpr)
])

model_saddle.fit(X_train, y_train_scaled)

In [None]:
joblib.dump(model_saddle, 'model_saddle.csv')

In [None]:
y_pred = model_saddle.predict(X_test)
y_pred = qt_y.inverse_transform(y_pred)
compute_score(y_test, y_pred)

#### Plate

In [None]:
df = pd.read_csv(r'plate.csv')

In [None]:
train_set, test_set = train_test_split(
    df, test_size=0.20, shuffle=True, stratify=df['Strat_cat'], random_state=42)
for col in (train_set,test_set):
    col.drop(['Strat_cat'],axis = 1 , inplace = True)

In [None]:
X_train = train_set.iloc[:, :10].values
y_train = train_set.iloc[:, 10].values

X_test = test_set.iloc[:, :10].values
y_test = test_set.iloc[:, 10].values

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class LogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return np.log1p(X)
    
    def inverse_transform(self, X):
        return np.expm1(X)
    
    def get_feature_names_out(self, input_features=None):
        if input_features is None:
            return None
        return [f"{name}_log" for name in input_features]

In [None]:
qt_x = QuantileTransformer(n_quantiles=10, random_state=0, output_distribution='normal')
qt_y = LogTransformer()

# # X_train_scaled = qt_x.fit_transform(X_train)
# # X_test_scaled = qt_x.transform(X_test)

y_train_scaled = qt_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = qt_y.transform(y_test.reshape(-1, 1)).flatten()

In [None]:
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error

def compute_score(y_true, y_pred):
    return {
        "R2": f"{r2_score(y_true, y_pred):.3f}",
        "MAE": f"{mean_absolute_error(y_true, y_pred):.3f}",
        "RMSE": f"{root_mean_squared_error(y_true, y_pred):.3f}",
    }

In [None]:
kernel=(WhiteKernel(noise_level=0.6) + 1.0 * 
        Matern(length_scale=10, length_scale_bounds=(1e-05, 100000.0), nu = 1.5))

gpr = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=10,alpha=5e-9,random_state=42)

model_plate = Pipeline([
    ('QuantileTransformer', qt_x),
    ('GPR', gpr)
])

model_plate.fit(X_train, y_train_scaled)

In [None]:
joblib.dump(model_plate, 'model_plate.csv')

In [None]:
y_pred = model_plate.predict(X_test)
y_pred = qt_y.inverse_transform(y_pred)
compute_score(y_test, y_pred)

#### Combined DF

In [None]:
df = pd.read_csv(r'balanced_df.csv')

In [None]:
train_set, test_set = train_test_split(
    df, test_size=0.20, shuffle=True, stratify=df['Strat_cat'], random_state=42)
for col in (train_set,test_set):
    col.drop(['Strat_cat'],axis = 1 , inplace = True)

In [None]:
X_train = train_set.iloc[:, :10].values
y_train = train_set.iloc[:, 10].values

X_test = test_set.iloc[:, :10].values
y_test = test_set.iloc[:, 10].values

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class LogTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return np.log1p(X)
    
    def inverse_transform(self, X):
        return np.expm1(X)
    
    def get_feature_names_out(self, input_features=None):
        if input_features is None:
            return None
        return [f"{name}_log" for name in input_features]

In [None]:
qt_x = QuantileTransformer(n_quantiles=10, random_state=0, output_distribution='normal')
qt_y = LogTransformer()

# # X_train_scaled = qt_x.fit_transform(X_train)
# # X_test_scaled = qt_x.transform(X_test)

y_train_scaled = qt_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = qt_y.transform(y_test.reshape(-1, 1)).flatten()

In [None]:
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error

def compute_score(y_true, y_pred):
    return {
        "R2": f"{r2_score(y_true, y_pred):.3f}",
        "MAE": f"{mean_absolute_error(y_true, y_pred):.3f}",
        "RMSE": f"{root_mean_squared_error(y_true, y_pred):.3f}",
    }

In [None]:
kernel=(WhiteKernel(noise_level=0.6) + 1.0 * 
        Matern(length_scale=10, length_scale_bounds=(1e-05, 100000.0), nu = 1.5))

gpr = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=10,alpha=5e-9,random_state=42)

model_comb = Pipeline([
    ('QuantileTransformer', qt_x),
    ('GPR', gpr)
])

model_comb.fit(X_train, y_train_scaled)

In [None]:
joblib.dump(model_comb, 'model_comb.csv')

In [None]:
y_pred = model_comb.predict(X_test)
y_pred = qt_y.inverse_transform(y_pred)
compute_score(y_test, y_pred)