# 1era Hackathon Minera Centinela
Equipo ARCA

In [None]:
import pandas as pd
from sklearn.base import clone as clone_model
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from xgboost import XGBRegressor

In [None]:
columns_rename = {
 'tiempo': 'T',
 '% Solido  Bombeo concentrado_EB': 'EB%', 
 'Presión de Descarga_EB_1': 'EB1',
 'Presión de Descarga_EB_2': 'EB2',
 'Presion_Estación de Valvulas_EV1_1': 'EV1_1',
 'Presión estación de valvulas 2_EV2_1': 'EV2_1',
 'Presion_Estación de Valvulas_EV1_2': 'EV1_2',
 'Presión estación de valvulas 2_EV2_2': 'EV2_2',
 'Presión_SM-1': 'SM1',
 'Presión_SM-2': 'SM2',
 'Presión_SM-3': 'SM3',
 'Presión_SM-4': 'SM4',
 'Porcentaje de Solido Alimentación Espesador': 'EDT%',
 'Presión_EDT_1': 'EDT1',
 'Presión_EDT_2': 'EDT2',
 'Presión_EDT_3': 'EDT3'
}

In [None]:
final_data = pd.read_excel('Data_test_hakcathon_CEN.xlsx')
train_data = pd.read_excel('Hack_concentraducto_v01.xlsx', sheet_name='Data_Hackathon')

## Transformación de datos

In [None]:
final_data.rename(columns=columns_rename, inplace=True)
train_data.rename(columns=columns_rename, inplace=True)

In [None]:
conversion = 6.89476 # [kPa/psi]

final_data[['EB1','EB2']] = final_data[['EB1','EB2']].apply(lambda x: x * conversion)

train_data[['EB1','EB2']] = train_data[['EB1','EB2']].apply(lambda x: x * conversion)

## Generación de modelos

In [None]:
models = dict()
pretrained_models = dict()

### Modelo SM1

In [None]:
sm1_model = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
models['SM1'] = clone_model(sm1_model)

In [None]:
sm1_model.fit(train_data[['EB%','EB1','EB2','SM2','EV1_1']], train_data['SM1'])
pretrained_models['SM1'] = sm1_model

### Modelo SM2

In [None]:
sm2_model = make_pipeline(PolynomialFeatures(degree=4),  LinearRegression())
models['SM2'] = clone_model(sm2_model)

In [None]:
sm2_model.fit(train_data[['EV1_1', 'EV1_2']], train_data['SM2'])
pretrained_models['SM2'] = sm2_model

### Modelo SM3

In [None]:
sm3_model = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
models['SM3'] = clone_model(sm3_model)

In [None]:
sm3_model.fit(train_data[['SM2', 'EV1_2', 'EV2_1', 'EV2_2']], train_data['SM3'])
pretrained_models['SM3'] = sm3_model

### Modelo SM4

In [None]:
sm4_model = XGBRegressor(random_state=42, n_estimators=175, learning_rate=0.302, reg_lambda=1, reg_alpha=0.5)
models['SM4'] = clone_model(sm4_model)

In [None]:
sm4_model.fit(train_data[['EV2_1','EV2_2', 'EDT%', 'EDT1']], train_data['SM4'])
pretrained_models['SM4'] = sm4_model

## Predicción de datos faltantes

### SM1

In [None]:
y_1 = final_data['SM1']
y_1_targets = y_1.isna()
X_1 = final_data[y_1_targets][['EB%','EB1','EB2','SM2','EV1_1']]
y_1_pred = pretrained_models['SM1'].predict(X_1)
final_data.loc[y_1_targets, 'SM1'] = y_1_pred

### SM2

In [None]:
y_2 = final_data['SM2']
y_2_targets = y_2.isna()
X_2 = final_data[y_2_targets][['EV1_1', 'EV1_2']]
y_2_pred = pretrained_models['SM2'].predict(X_2)
final_data.loc[y_2_targets, 'SM2'] = y_2_pred

### SM3

In [None]:
y_3 = final_data['SM3']
y_3_targets = y_3.isna()
X_3 = final_data[y_3_targets][['SM2', 'EV1_2', 'EV2_1', 'EV2_2']]
y_3_pred = pretrained_models['SM3'].predict(X_3)
final_data.loc[y_3_targets, 'SM3'] = y_3_pred

### SM4

In [None]:
y_4 = final_data['SM4']
y_4_targets = y_4.isna()
X_4 = final_data[y_4_targets][['EV2_1','EV2_2', 'EDT%', 'EDT1']]
y_4_pred = pretrained_models['SM4'].predict(X_4)
final_data.loc[y_4_targets, 'SM4'] = y_4_pred

## Exportado de datos finales

In [None]:
final_data.to_excel('Data_test_hackathon_CEN_final_Team_ARCA.xlsx', index=False)