# Hackathon

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.base import clone as clone_model
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV, SGDRegressor
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV, cross_validate
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
columns_rename = {
 'tiempo': 'T',
 '% Solido  Bombeo concentrado_EB': 'EB%', 
 'Presión de Descarga_EB_1': 'EB1',
 'Presión de Descarga_EB_2': 'EB2',
 'Presion_Estación de Valvulas_EV1_1': 'EV1_1',
 'Presión estación de valvulas 2_EV2_1': 'EV2_1',
 'Presion_Estación de Valvulas_EV1_2': 'EV1_2',
 'Presión estación de valvulas 2_EV2_2': 'EV2_2',
 'Presión_SM-1': 'SM1',
 'Presión_SM-2': 'SM2',
 'Presión_SM-3': 'SM3',
 'Presión_SM-4': 'SM4',
 'Porcentaje de Solido Alimentación Espesador': 'EDT%',
 'Presión_EDT_1': 'EDT1',
 'Presión_EDT_2': 'EDT2',
 'Presión_EDT_3': 'EDT3'
}

# Solución final

In [3]:
final_data = pd.read_excel('Data_test_hakcathon_CEN.xlsx')
train_data = pd.read_excel('Hack_concentraducto_v01.xlsx', sheet_name='Data_Hackathon')

In [4]:
final_data.rename(columns=columns_rename, inplace=True)
train_data.rename(columns=columns_rename, inplace=True)

In [5]:
models = dict()
pretrained_models = dict()

## Modelo SM1

In [6]:
sm1_model = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
models['SM1'] = clone_model(sm1_model)

In [7]:
sm1_model.fit(train_data[['EB%','EB1','EB2','SM2','EV1_1']], train_data['SM1'])
pretrained_models['SM1'] = sm1_model

## Modelo SM2

In [8]:
sm2_model = make_pipeline(PolynomialFeatures(degree=4),  LinearRegression())
models['SM2'] = clone_model(sm2_model)

In [9]:
sm2_model.fit(train_data[['EV1_1', 'EV1_2']], train_data['SM2'])
pretrained_models['SM2'] = sm2_model

## Modelo SM3

In [10]:
sm3_model = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
models['SM3'] = clone_model(sm3_model)

In [11]:
sm3_model.fit(train_data[['SM2', 'EV1_2', 'EV2_1', 'EV2_2']], train_data['SM3'])
pretrained_models['SM3'] = sm3_model

## Modelo SM4

In [12]:
sm4_model = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
models['SM4'] = clone_model(sm4_model)

In [13]:
sm4_model.fit(train_data[['EV2_1','EV2_2', 'EDT%', 'EDT1']], train_data['SM4'])
pretrained_models['SM4'] = sm4_model

## Predicción de datos faltantes

In [14]:
y_1 = final_data['SM1']
y_1_targets = y_1.isna()
X_1 = final_data[y_1_targets][['EB%','EB1','EB2','SM2','EV1_1']]
y_1_pred = pretrained_models['SM1'].predict(X_1)
final_data.loc[y_1_targets, 'SM1'] = y_1_pred

### SM2

In [15]:
y_2 = final_data['SM2']
y_2_targets = y_2.isna()
X_2 = final_data[y_2_targets][['EV1_1', 'EV1_2']]
y_2_pred = pretrained_models['SM2'].predict(X_2)
final_data.loc[y_2_targets, 'SM2'] = y_2_pred

### SM3

In [16]:
y_3 = final_data['SM3']
y_3_targets = y_3.isna()
X_3 = final_data[y_3_targets][['SM2', 'EV1_2', 'EV2_1', 'EV2_2']]
y_3_pred = pretrained_models['SM3'].predict(X_3)
final_data.loc[y_3_targets, 'SM3'] = y_3_pred

### SM4

In [17]:
y_4 = final_data['SM4']
y_4_targets = y_4.isna()
X_4 = final_data[y_4_targets][['EV2_1','EV2_2', 'EDT%', 'EDT1']]
y_4_pred = pretrained_models['SM4'].predict(X_4)
final_data.loc[y_4_targets, 'SM4'] = y_4_pred

## Exportado de datos finales

In [18]:
final_data.to_excel('Data_test_hackathon_CEN_final_Team_ARCA.xlsx', index=False)

# Trabajo de desarrollo y selección de modelos

In [19]:
data = pd.read_excel('Hack_concentraducto_v01.xlsx', sheet_name='Data_Hackathon')
df1 = data.copy()
df1.head()

Unnamed: 0,tiempo,% Solido Bombeo concentrado_EB,Presión de Descarga_EB_1,Presión de Descarga_EB_2,Presion_Estación de Valvulas_EV1_1,Presion_Estación de Valvulas_EV1_2,Presión_SM-1,Presión_SM-2,Presión estación de valvulas 2_EV2_1,Presión estación de valvulas 2_EV2_2,Porcentaje de Solido Alimentación Espesador,Presión_EDT_1,Presión_EDT_2,Presión_EDT_3,Presión_SM-3,Presión_SM-4
0,2023-01-01 00:00:00,60.305771,-0.663051,1625.289917,3662.116943,3653.241699,16701.179688,3040.474121,3330.878174,3303.286377,59.756073,10653.394531,8614.203125,321.568634,2477.526611,1647.383789
1,2023-01-01 00:03:00,60.316761,-0.663393,1625.912354,3666.185059,3657.404053,16702.007812,3044.661621,3335.265137,3307.434814,59.675354,10658.351562,8619.733398,320.516357,2484.498535,1650.932861
2,2023-01-01 00:06:00,60.327747,-0.663736,1626.53479,3670.25293,3661.566406,16702.835938,3048.849121,3339.6521,3311.583252,59.709515,10658.579102,8621.905273,321.340149,2489.650879,1653.877686
3,2023-01-01 00:09:00,60.338734,-0.664078,1627.157349,3674.321045,3665.72876,16703.664062,3053.036621,3344.039062,3315.731689,59.712528,10663.964844,8622.951172,321.454895,2493.044678,1656.922974
4,2023-01-01 00:12:00,60.35099,-0.66442,1627.779785,3678.38916,3669.891357,16704.492188,3057.224121,3348.426025,3319.880127,59.747906,10667.239258,8625.802734,322.045898,2498.507568,1662.279785


In [20]:
df1.isnull().sum()

tiempo                                         0
% Solido  Bombeo concentrado_EB                0
Presión de Descarga_EB_1                       0
Presión de Descarga_EB_2                       0
Presion_Estación de Valvulas_EV1_1             0
Presion_Estación de Valvulas_EV1_2             0
Presión_SM-1                                   0
Presión_SM-2                                   0
Presión estación de valvulas 2_EV2_1           0
Presión estación de valvulas 2_EV2_2           0
Porcentaje de Solido Alimentación Espesador    0
Presión_EDT_1                                  0
Presión_EDT_2                                  0
Presión_EDT_3                                  0
Presión_SM-3                                   0
Presión_SM-4                                   0
dtype: int64

In [21]:
mean_concentrated = df1["% Solido  Bombeo concentrado_EB"].mean()
mean_concentrated

43.04818888272726

In [22]:
min_concentrated = df1["Presión de Descarga_EB_1"].min()
min_concentrated

-7.87132453918457

# Preprocesamiento
## Conversión de datos

In [23]:
df2 = data.copy()
df2.rename(columns=columns_rename, inplace=True)

### Conversión de presiones

In [24]:
conversion = 6.89476

df2[['EB1','EB2']] = df2[['EB1','EB2']].apply(lambda x: x * conversion)

### Normalización de datos
El resultado de esta etapa fue utilizado para el análisis exploratorio de datos, pero no se considero esta transformación para los modelos finales, ya que en los modelos escogidos, no generaban diferencias, y se quiso evitar transformaciones innecesesarias entre las medidas (puesto que, si se normalizaban los datos, se debería hacer la transformación inversa para obtener los valores reales).

In [25]:
excluded_columns = ['T']
normalize_columns = [col for col in df2.columns if col not in excluded_columns]

scaler = StandardScaler()

df_normalized = pd.DataFrame(scaler.fit_transform(df2[normalize_columns]), columns=normalize_columns)
df_normalized = pd.concat([df2[excluded_columns], df_normalized], axis=1)


# Visualización de datos

In [None]:
pd.plotting.scatter_matrix(df2)

In [None]:
pd.plotting.scatter_matrix(df_normalized)

In [None]:
feats = df2.columns.difference(['T'])
for feat in df2.columns.difference(['T']):
 plt.plot(df2['T'], df2[feat])
plt.xlabel('T')
plt.show()

In [None]:
for feat in feats:
 plt.plot(df_normalized['T'], df_normalized[feat])
plt.xlabel('T')
plt.show()

### Correlación entre SM2 y EV2
Analizando el conjunto de datos, se encontró que, en casos donde la presión de SM2 cambia entre dos tuplas de manera drástica, el cambio se propaga de manera instantánea a la presión de EV2. Esto se puede observar en el siguiente gráfico:

In [None]:
plt.scatter(df_normalized['SM2'], df_normalized['EV1_1'])

In [None]:
plt.scatter(df_normalized['SM4'], df_normalized['EV2_2'])

In [None]:
plt.scatter(df_normalized['SM4'], df_normalized['EDT1'])

In [None]:
plt.scatter(df_normalized['SM1'], df_normalized['EB2'])


In [None]:
plt.scatter(df_normalized['SM1'], df_normalized['SM2'])

In [None]:
plt.scatter(df_normalized['SM3'], df_normalized['EV2_1'])

In [None]:
plt.scatter(df_normalized['SM3'], df_normalized['EV1_2'])

# Modelo Lineal

## Predicción SM1

In [None]:
X_1 = df2[['EB%','EB1','EB2','SM2','EV1_1']]
y_1 = df2['SM1']
X_train, X_test, y_train, y_test = train_test_split(X_1, y_1, random_state=42, test_size=0.4)

### Regresión Linear

In [None]:
sm1 = LinearRegression()
%time sm1.fit(X_train, y_train)
y_pred = sm1.predict(X_test)
r2_1 = r2_score(y_test, y_pred)
mae_1 = mean_absolute_error(y_test, y_pred)
rmse_1 = mean_squared_error(y_test, y_pred, squared=False)
print(f'SM1: R2 = {r2_1}, MAE = {mae_1}, RMSE = {rmse_1}')
cross_validate(sm1, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

Observaciones:
Mayor puntuación, con un 30% de datos asignados para validación, con todos los datos de presión en [kPa]

### Regresión Linear Polinomial

In [None]:
poly = PolynomialFeatures(degree=3)
linear = LinearRegression()
sm1 = make_pipeline(poly, linear)
%time sm1.fit(X_train, y_train)
y_pred = sm1.predict(X_test)
r2_1 = r2_score(y_test, y_pred)
mae_1 = mean_absolute_error(y_test, y_pred)
rmse_1 = mean_squared_error(y_test, y_pred, squared=False)
print(f'SM1: R2 = {r2_1}, MAE = {mae_1}, RMSE = {rmse_1}')
cross_validate(sm1, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Regresión Lasso

In [None]:
sm1 = LassoCV(max_iter=5000)
sm1.fit(X_train, y_train)
y_pred = sm1.predict(X_test)
r2_1 = r2_score(y_test, y_pred)
mae_1 = mean_absolute_error(y_test, y_pred)
print(f'SM1: R2 = {r2_1}, MAE = {mae_1}')

En primer intento, se tuvo menor puntaje y problemas de convergencia.
Se aumenta número de iteraciones y se obtiene mayor demora, manteniendo puntaje.

### Elastic Net

In [None]:
sm1 = ElasticNetCV()
sm1.fit(X_train, y_train)
y_pred = sm1.predict(X_test)
r2_1 = r2_score(y_test, y_pred)
mae_1 = mean_absolute_error(y_test, y_pred)
print(f'SM1: R2 = {r2_1}, MAE = {mae_1}')

### SGD

In [None]:
sm1 = SGDRegressor()
sm1.fit(X_train, y_train)
y_pred = sm1.predict(X_test)
r2_1 = r2_score(y_test, y_pred)
mae_1 = mean_absolute_error(y_test, y_pred)
print(f'SM1: R2 = {r2_1}, MAE = {mae_1}')

### XGBoost

In [None]:
sm1 = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
%time sm1.fit(X_train, y_train)
y_pred = sm1.predict(X_test)
r2_1 = r2_score(y_test, y_pred)
mae_1 = mean_absolute_error(y_test, y_pred)
rmse_1 = mean_squared_error(y_test, y_pred, squared=False)
print(f'SM1: R2 = {r2_1}, MAE = {mae_1}, RMSE = {rmse_1}')
cross_validate(sm1, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Random Forest

Observaciones:
Entrenamiento lento, fuera de plazo máximo. Descartado.

## Predicción SM2
Luego del análisis explorativo, se encontró una alta correlación entre las mediciones de SM2 y EV1, por lo cual se escogen sus sensores como únicas entradas del modelo.

In [None]:
X_2 = df2[['EV1_1', 'EV1_2']]
y_2 = df2['SM2']
X_2_train, X_2_test, y_2_train, y_2_test = train_test_split(X_2, y_2, random_state=42, test_size=0.4)

### Regresión Linear

In [None]:
sm2 = LinearRegression()
%time sm2.fit(X_2_train, y_2_train)
y_2_pred = sm2.predict(X_2_test)
r2_2 = r2_score(y_2_test, y_2_pred)
mae_2 = mean_absolute_error(y_2_test, y_2_pred)
rmse_2 = mean_squared_error(y_2_test, y_2_pred, squared=False)
print(f'SM2: R2 = {r2_2}, MAE = {mae_2}')
cross_validate(sm2, X_2, y_2, cv=10, scoring=('r2', 'neg_mean_absolute_error'))

Observaciones:
Evaluando indivdualmente, al considerar solo la medición de entrada de EV1, se obtiene una puntuación de R2 = 0.9948631933284161, MAE = 0.061770157769054965; considerando entrada y salida de EV1, se obtiene R2 = 0.9951739335689458, MAE = 0.06143409021178661.

### Regresión Linear Polinomial

In [None]:
poly = PolynomialFeatures(degree=4)
linear = LinearRegression()
sm2 = make_pipeline(poly, linear)
%time sm2.fit(X_2_train, y_2_train)
y_2_pred = sm2.predict(X_2_test)
r2_2 = r2_score(y_2_test, y_2_pred)
mae_2 = mean_absolute_error(y_2_test, y_2_pred)
rmse_2 = mean_squared_error(y_2_test, y_2_pred, squared=False)
print(f'SM2: R2 = {r2_2}, MAE = {mae_2}')
cross_validate(sm2, X_2, y_2, cv=10, scoring=('r2', 'neg_mean_absolute_error'))

### Regresión Lasso

In [None]:
sm2 = LassoCV()
sm2.fit(X_2_train, y_2_train)
y_2_pred = sm2.predict(X_2_test)
r2_2 = r2_score(y_2_test, y_2_pred)
mae_2 = mean_absolute_error(y_2_test, y_2_pred)
rmse_2 = mean_squared_error(y_2_test, y_2_pred, squared=False)
print(f'SM2: R2 = {r2_2}, MAE = {mae_2}')
cross_validate(sm2, X_2, y_2, cv=10, scoring=('r2', 'neg_mean_absolute_error'))

### Elastic Net

In [None]:
sm2 = ElasticNetCV()
sm2.fit(X_2_train, y_2_train)
y_2_pred = sm2.predict(X_2_test)
r2_2 = r2_score(y_2_test, y_2_pred)
mae_2 = mean_absolute_error(y_2_test, y_2_pred)
rmse_2 = mean_squared_error(y_2_test, y_2_pred, squared=False)
print(f'SM2: R2 = {r2_2}, MAE = {mae_2}')
cross_validate(sm2, X_2, y_2, cv=10, scoring=('r2', 'neg_mean_absolute_error'))

### SGD

In [None]:
sm2 = SGDRegressor()
sm2.fit(X_2_train, y_2_train)
y_2_pred = sm2.predict(X_2_test)
r2_2 = r2_score(y_2_test, y_2_pred)
mae_2 = mean_absolute_error(y_2_test, y_2_pred)
rmse_2 = mean_squared_error(y_2_test, y_2_pred, squared=False)
print(f'SM2: R2 = {r2_2}, MAE = {mae_2}')
cross_validate(sm2, X_2, y_2, cv=10, scoring=('r2', 'neg_mean_absolute_error'))

### XGBoost

In [None]:
sm2 = XGBRegressor(random_state=42)
%time sm2.fit(X_2_train, y_2_train)
y_2_pred = sm2.predict(X_2_test)
r2_2 = r2_score(y_2_test, y_2_pred)
mae_2 = mean_absolute_error(y_2_test, y_2_pred)
print(f'SM2: R2 = {r2_2}, MAE = {mae_2}')

## Predicción SM3
Luego del análisis explorativo, se encontró una correlación entre las mediciones de SM3 y EV1, y entre SM3 y EB2.

In [None]:
X_3 = df2[['SM2', 'EV1_2', 'EV2_1', 'EV2_2']]
y_3 = df2['SM3']
X_3_train, X_3_test, y_3_train, y_3_test = train_test_split(X_3, y_3, random_state=42, test_size=0.4)

### Regresión Linear

In [None]:
sm3 = LinearRegression()
%time sm3.fit(X_3_train, y_3_train)
y_3_pred = sm3.predict(X_3_test) 
r2_3 = r2_score(y_3_test, y_3_pred)
mae_3 = mean_absolute_error(y_3_test, y_3_pred)
rmse_3 = mean_squared_error(y_3_test, y_3_pred, squared=False)
print(f'SM3: R2 = {r2_3}, MAE = {mae_3}, RMSE = {rmse_3}')
cross_validate(sm3, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

Observaciones:
Evaluando indivdualmente, al considerar solo la medición de salida de EV1, se obtiene una puntuación de R2 = 0.757428290149249, MAE = 0.36624686002811024; y considerando solo la medición de entrada de EV2, se obtiene R2 = 0.5894141038841055, MAE = 0.44782809908607935.
Considerando ambas mediciones, se obtiene R2 = 0.8643364789365919, MAE = 0.27408716724892734

### Regresión Linear Polinomial

In [None]:
poly = PolynomialFeatures(degree=4)
linear = LinearRegression()
%time sm3 = make_pipeline(poly, linear)
sm3.fit(X_3_train, y_3_train)
y_3_pred = sm3.predict(X_3_test)
r2_3 = r2_score(y_3_test, y_3_pred)
mae_3 = mean_absolute_error(y_3_test, y_3_pred)
rmse_3 = mean_squared_error(y_3_test, y_3_pred, squared=False)
print(f'SM3: R2 = {r2_3}, MAE = {mae_3}, RMSE = {rmse_3}')
cross_validate(sm3, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Regresión Lasso

In [None]:
sm3 = LassoCV()
sm3.fit(X_3_train, y_3_train)
y_3_pred = sm3.predict(X_3_test)
r2_3 = r2_score(y_3_test, y_3_pred)
mae_3 = mean_absolute_error(y_3_test, y_3_pred)
rmse_3 = mean_squared_error(y_3_test, y_3_pred, squared=False)
print(f'SM3: R2 = {r2_3}, MAE = {mae_3}, RMSE = {rmse_3}')
cross_validate(sm3, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Elastic Net

In [None]:
sm3 = ElasticNetCV()
sm3.fit(X_3_train, y_3_train)
y_3_pred = sm3.predict(X_3_test)
r2_3 = r2_score(y_3_test, y_3_pred)
mae_3 = mean_absolute_error(y_3_test, y_3_pred)
rmse_3 = mean_squared_error(y_3_test, y_3_pred, squared=False)
print(f'SM3: R2 = {r2_3}, MAE = {mae_3}, RMSE = {rmse_3}')
cross_validate(sm3, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### SGD

In [None]:
sm3 = SGDRegressor()
sm3.fit(X_3_train, y_3_train)
y_3_pred = sm3.predict(X_3_test)
r2_3 = r2_score(y_3_test, y_3_pred)
mae_3 = mean_absolute_error(y_3_test, y_3_pred)
rmse_3 = mean_squared_error(y_3_test, y_3_pred, squared=False)
print(f'SM3: R2 = {r2_3}, MAE = {mae_3}, RMSE = {rmse_3}')
cross_validate(sm3, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### XGBoost

In [None]:
sm3 = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
sm3.fit(X_3_train, y_3_train)
y_3_pred = sm3.predict(X_3_test)
r2_3 = r2_score(y_3_test, y_3_pred)
mae_3 = mean_absolute_error(y_3_test, y_3_pred)
rmse_3 = mean_squared_error(y_3_test, y_3_pred, squared=False)
print(f'SM3: R2 = {r2_3}, MAE = {mae_3}, RMSE = {rmse_3}')
cross_validate(sm3, X_3, y_3, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

## Predicción SM4
Luego del análisis explorativo, se encontró una correlación entre las mediciones de SM4 y EV2, y entre SM4 y EDT1.

In [None]:
X_4 = df2[['EV2_1','EV2_2', 'EDT%', 'EDT1']] 
y_4 = df2['SM4']
X_4_train, X_4_test, y_4_train, y_4_test = train_test_split(X_4, y_4, random_state=42, test_size=0.4)

### Regresión Linear

In [None]:
sm4 = LinearRegression()
sm4.fit(X_4_train, y_4_train)
y_4_pred = sm4.predict(X_4_test)
r2_4 = r2_score(y_4_test, y_4_pred)
mae_4 = mean_absolute_error(y_4_test, y_4_pred)
rmse_4 = mean_squared_error(y_4_test, y_4_pred, squared=False)
print(f'SM4: R2 = {r2_4}, MAE = {mae_4}, RMSE = {rmse_4}')
cross_validate(sm4, X_4, y_4, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Regresión Lineal Polinomial

In [None]:
poly = PolynomialFeatures(degree=4)
linear = LinearRegression()
sm4 = make_pipeline(poly, linear)
sm4.fit(X_4_train, y_4_train)
y_4_pred = sm4.predict(X_4_test)
r2_4 = r2_score(y_4_test, y_4_pred)
mae_4 = mean_absolute_error(y_4_test, y_4_pred)
rmse_4 = mean_squared_error(y_4_test, y_4_pred, squared=False)
print(f'SM4: R2 = {r2_4}, MAE = {mae_4}, RMSE = {rmse_4}')
cross_validate(sm4, X_4, y_4, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Regresión Lasso

In [None]:
sm4 = LassoCV()
sm4.fit(X_4_train, y_4_train)
y_4_pred = sm4.predict(X_4_test)
r2_4 = r2_score(y_4_test, y_4_pred)
mae_4 = mean_absolute_error(y_4_test, y_4_pred)
rmse_4 = mean_squared_error(y_4_test, y_4_pred, squared=False)
print(f'SM4: R2 = {r2_4}, MAE = {mae_4}, RMSE = {rmse_4}')
cross_validate(sm4, X_4, y_4, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### Elastic Net

In [None]:
sm4 = ElasticNetCV()
sm4.fit(X_4_train, y_4_train)
y_4_pred = sm4.predict(X_4_test)
r2_4 = r2_score(y_4_test, y_4_pred)
mae_4 = mean_absolute_error(y_4_test, y_4_pred)
rmse_4 = mean_squared_error(y_4_test, y_4_pred, squared=False)
print(f'SM4: R2 = {r2_4}, MAE = {mae_4}, RMSE = {rmse_4}')
cross_validate(sm4, X_4, y_4, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### SGD

In [None]:
sm4 = SGDRegressor()
sm4.fit(X_4_train, y_4_train)
y_4_pred = sm4.predict(X_4_test)
r2_4 = r2_score(y_4_test, y_4_pred)
mae_4 = mean_absolute_error(y_4_test, y_4_pred)
rmse_4 = mean_squared_error(y_4_test, y_4_pred, squared=False)
print(f'SM4: R2 = {r2_4}, MAE = {mae_4}, RMSE = {rmse_4}')
cross_validate(sm4, X_4, y_4, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))

### XGBoost

In [None]:
sm4 = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
sm4.fit(X_4_train, y_4_train)
y_4_pred = sm4.predict(X_4_test)
r2_4 = r2_score(y_4_test, y_4_pred)
mae_4 = mean_absolute_error(y_4_test, y_4_pred)
rmse_4 = mean_squared_error(y_4_test, y_4_pred, squared=False)
print(f'SM4: R2 = {r2_4}, MAE = {mae_4}, RMSE = {rmse_4}')
cross_validate(sm4, X_4, y_4, cv=10, scoring=('r2', 'neg_mean_absolute_error', 'neg_root_mean_squared_error'))