# System recommendation 
- **Paso 1:** Generar predicción de TPH.
- **Paso 2:** Explorar espacio 10 minutos antes del inicio de la caida según el cluster al cual pertenece el modelo.
- **Paso 3:** Generar recomendación a partir de la curva de molienda.
- **Paso 4:** Dado el HH celda de carga recomendado --> Obtener LL celda de carga y con esto la celda de carga promedio.
- **Paso 5:** Dadas las nuevas variables de la familia celda de carga, generar predicción de TPH, con el objetivo de obtener el TPH optimo que se obtendria si se siguiese la recomendación!

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
import xgboost as xgb

from numpy import mean,sqrt,square
from sklearn.metrics import mean_squared_error
import pickle
import missingno as msno
import auxiliar_functions as af
sns.set_style("darkgrid")
pd.set_option('display.max_columns', None)
pd.options.display.max_colwidth = 100

In [None]:
tag_select=['min_water_3',# 1
 'min_solid percentage_10',# 2
 'HH TPH',# 3
 'max_delta LL charge cell_10',# 4
 'power',# 5
 'rms_delta LL charge cell_3',# 6
 'var_delta HH charge cell_10',# 7
 'max_covelin law_10',# 8
 'LL charge cell_(t-2)',# 9
 'min_granulometry_5',# 10
 'max_bornite law_10', # 11
 'min_charge cell_5',# 12
 'chalcocite law_(t-2)',# 13
 'max_sag power index_5',# 14
 'min_speed_3',# 15
 'var_bornite law_3',# 16
 'var_speed_3',# 17
 'min_pyrite law_10', # 18
 'crusher index_(t-5)', # 19
 'var_power_3',# 20
 'var_chalcocite law_3',# 21
 'var_bornite law_5',# 22
 'var_solid percentage_3',# 23
 'var_speed_10',# 24
 'ball work index_(t-1)',# 25
 'var_chalcocite law_5', # 26
 'var_water_3',# 27
 'chalcopyrite law_(t-5)',# 28
 'var_crusher index_10',# 29
 'var_chalcopyrite law_3',# 30
 'var_granulometry_3',# 31
 'var_delta HH charge cell_3'# 32
 ]

In [None]:
variables_interes=['pyrite law', 'chalcopyrite law',
       'chalcocite law', 'covelin law', 'crusher index', 'sag power index',
       'ball work index', 'bornite law','charge cell', 'speed','HH TPH', 'granulometry','Edad','loss of TPH',"solid percentage","HH charge cell","LL charge cell","TPH",
       #"recommendation base model carl","recommendation base model acn",
       "water","power"]

In [None]:
# read df full csv
df_features=pd.read_csv('../../data/processed data/df_feature_engineering_18December2022.csv', parse_dates=['Timestamp'], index_col='Timestamp')#[variables_interes]#["2020":"2021"]
df_features.dropna(inplace=True)
df_features.sort_index(inplace=True)
df_features.shape

# Load pipeline, XGBoost

In [None]:
# Load: 
model= xgb.XGBRegressor()
model.load_model("../../models/model_xgb_water.bin")

# Leer pipeline:
data_process=pickle.load(open('../../models/pipe_xgb_water.pkl', 'rb'))

# Preprocesamiento de datos:
pipe = Pipeline(
    [
        ("preprocesamiento", data_process[0]), 
     ("clf", model)
    ]
)

# Data test para recomendación HH CC

In [None]:
# Test
test=pd.concat([df_features["2020-04"],df_features["2021-09"],df_features["2022"]])
# Ytest reg
Ytest=test["TPH"]
Xtest=test.drop(columns=["TPH"])

# **Paso 1:** Generar predicción de TPH.

In [None]:
# Test 
test_time_serie=Xtest
test_time_serie["TPH"]=Ytest
test_time_serie["TPH predict"]=pipe.predict(test_time_serie)

# Métricas
print('R2: ',r2_score(test_time_serie["TPH"], 
                      test_time_serie["TPH predict"]),
                      '\n','RMSE: ',mean_squared_error(test_time_serie["TPH"], test_time_serie["TPH predict"], squared=False))
#test_time_serie["TPH"]=test_time_serie["TPH predict"].shift(10)
test_time_serie.dropna(inplace=True)
data=test_time_serie

# **Paso 2:** Explorar espacio 10 minutos antes del inicio de la caida.


In [None]:
# Generación de delta TPH y loss TPH según la predicción del modelo
test_time_serie["delta HH TPH"]=test_time_serie["HH TPH"]-test_time_serie["TPH predict"]
test_time_serie["loss of TPH"]=test_time_serie["delta HH TPH"].apply(lambda x: 1 if x>100 else 0)

# Información de perdida de TPH en los proximos 5 minutos
for i in range(1,6):
    test_time_serie[f"loss of TPH_{i}"]=test_time_serie["loss of TPH"].shift(i)

# Suavización del estado de perdida
test_time_serie["loss of TPH"]=test_time_serie.apply(lambda x: 1 if (x['loss of TPH']==1) or (x['loss of TPH_1']==1) or 
                                                     (x['loss of TPH_2']==1) or (x['loss of TPH_3']==1) or (x['loss of TPH_4']==1) or (x['loss of TPH_5']==1) else 0,axis=1)

# Inicio de la perdida de tph
test_time_serie["start loss tph"]=af.loss_tph(test_time_serie,"loss of TPH")["start loss tph"].to_list()

In [None]:
titulos=tuple(["TPH","loss of TPH","start loss TPH"])
fig = make_subplots(
    rows=3, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH predict"],
                    mode='lines',
                    name='TPH predict'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["HH TPH"],
                    mode='lines',
                    name='HH TPH'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH"],
                    mode='lines',
                    name='TPH'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["loss of TPH"],
                    mode='lines',
                    name="loss of TPH"),row=2, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["start loss tph"],
                    mode='lines',
                    name="start loss TPH"),row=3, col=1)

fig.update_layout(height=800, width=1200, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")

fig.show()

# **Paso 3:** Generar recomendación a partir de la curva de molienda.


In [None]:
range_hh_cc=np.arange(740,900,10)
range_hh_cc

In [None]:
test_time_serie["start loss tph"].value_counts()

In [None]:
timestamp_loss_tph=test_time_serie["start loss tph"][test_time_serie["start loss tph"]==1].index.to_list()
len(timestamp_loss_tph)

In [None]:
df_grinding_curve=pd.DataFrame(index=range_hh_cc)
list_df_recommendation=[]
list_df_tph=[]
list_index=[]
k=0
for i in timestamp_loss_tph:
    
    try:

        inicio=i-timedelta(minutes=10)
        fin=i
        print(k)
        print(fin)

        df=test_time_serie.loc[inicio:fin]

        #display(df["cluster_rec"].iloc[-1])

        rec,tph,fig=af.optimum_recommendation(df,range_hh_cc,pipe)
        list_df_recommendation.append(rec)
        list_df_tph.append(tph)
        list_index.append(fin)

        # Save curva tph vs cc
        fecha=fin.strftime("%Y-%m-%d--%H-%M-%S")
        fig.write_image(f"../../images/Curvas de molienda/fig_{k}_{fecha}.png")

        # Test
        k=k+1
        #if k>5:break

    except Exception as e:
        
        print("Error :",e)


In [None]:
len(list_df_recommendation)

In [None]:
k0=0
test_time_serie["recommended HH charge cell"]=0

for i in timestamp_loss_tph:
    try:
        print(k0)
        test_time_serie["recommended HH charge cell"][timestamp_loss_tph[k0]:timestamp_loss_tph[k0+1]]=list_df_recommendation[k0]
        k0=k0+1
    
    except:pass

In [None]:
test_time_serie["recommended HH charge cell"]=test_time_serie["recommended HH charge cell"].replace({0:np.nan}).fillna(method='ffill').fillna(method='bfill')
test_time_serie["recommended HH charge cell"]

In [None]:
titulos=tuple(["TPH & HH TPH","charge cell","granulometry & SPI","speed",'water',"solid percentage","start loss tph","loss of TPH"])
fig = make_subplots(
    rows=7, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True
    )

#fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH optimum"],
#                    mode='lines',
#                    name='TPH predict opt'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH predict"],
                    mode='lines',
                    name='TPH predict'),row=1, col=1)
fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["HH TPH"],
                    mode='lines',
                    name='HH TPH'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH"],
                    mode='lines',
                    name='TPH'),row=1, col=1)

##
fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["HH charge cell"],
                    mode='lines',
                    name='HH charge cell'),row=2, col=1)


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["recommended HH charge cell"],
                    mode='lines',
                    name='recommended HH charge cell ML model'),row=2, col=1)


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["LL charge cell"],
                    mode='lines',
                    name='LL charge cell'),row=2, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["charge cell"],
                    mode='lines',
                    name='charge cell'),row=2, col=1)  


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['granulometry'], ##FF6511
                    mode='lines',
                    name='granulometry',line=dict(width=3,)),row=3, col=1)  


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['sag power index'], ##FF6511
                    mode='lines',
                    name="sag power index",line=dict(width=3, )),row=3, col=1)  


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['speed'], ##FF6511
                    mode='lines',
                    name="speed",line=dict(width=3, )),row=4, col=1)  

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['solid percentage'], ##FF6511
                    mode='lines',
                    name='solid percentage',line=dict(width=3, )),row=5, col=1)  

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['water'], ##FF6511
                    mode='lines',
                    name='water',line=dict(width=3, )),row=6, col=1)  


#fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["predict start loss tph"],
#                    mode='lines',
#                    name="start loss TPH"),row=7, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["loss of TPH"],
                    mode='lines',
                    name="loss of TPH"),row=7, col=1)


fig.update_layout(height=2000, width=1500, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")


fig.show()

# **Paso 4:** Dado el HH celda de carga recomendado --> Obtener LL celda de carga y con esto la celda de carga promedio.


In [None]:
# Guardamos señales originales:
test_time_serie["HH charge cell 0"]=test_time_serie["HH charge cell"]
test_time_serie["LL charge cell 0"]=test_time_serie["LL charge cell"]
test_time_serie["charge cell 0"]=test_time_serie["charge cell"]

In [None]:
# Actualizamos la familia charge cell para obtener el tph que se obtendria al seguir la recomendación
test_time_serie["HH charge cell"]=test_time_serie["recommended HH charge cell"]
test_time_serie["LL charge cell"]=test_time_serie["HH charge cell"]-80
test_time_serie["charge cell"]=(test_time_serie["HH charge cell"]+test_time_serie["LL charge cell"])/2

In [None]:
# Update feature engineering con actualización de familia charge cell dada la recomendación
test_time_serie=af.feature_engineering_recommendation(test_time_serie)#[tag_select]

# **Paso 5:** Dadas las nuevas variables de la familia celda de carga, generar predicción de TPH, con el objetivo de obtener el TPH optimo que se obtendria si se siguiese la recomendación!

In [None]:
# Predict del modelo de TPH con la familia charge cell actualizada
features=test_time_serie
Ypred=pipe.predict(features)
test_time_serie["TPH optimum"]=Ypred

# **Visualización**

In [None]:
titulos=tuple(["TPH & HH TPH","charge cell","granulometry & SPI","speed",'water',"solid percentage","start loss tph","loss of TPH"])
fig = make_subplots(
    rows=7, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True
    )

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH optimum"],
                    mode='lines',
                    name='TPH predict opt'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH predict"],
                    mode='lines',
                    name='TPH predict'),row=1, col=1)
fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["HH TPH"],
                    mode='lines',
                    name='HH TPH'),row=1, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["TPH"],
                    mode='lines',
                    name='TPH'),row=1, col=1)

##
fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["HH charge cell 0"],
                    mode='lines',
                    name='HH charge cell'),row=2, col=1)


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["recommended HH charge cell"],
                    mode='lines',
                    name='recommended HH charge cell ML model'),row=2, col=1)


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["LL charge cell 0"],
                    mode='lines',
                    name='LL charge cell'),row=2, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["charge cell 0"],
                    mode='lines',
                    name='charge cell'),row=2, col=1)  


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['granulometry'], ##FF6511
                    mode='lines',
                    name='granulometry',line=dict(width=3,)),row=3, col=1)  


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['sag power index'], ##FF6511
                    mode='lines',
                    name="sag power index",line=dict(width=3, )),row=3, col=1)  


fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['speed'], ##FF6511
                    mode='lines',
                    name="speed",line=dict(width=3, )),row=4, col=1)  

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['solid percentage'], ##FF6511
                    mode='lines',
                    name='solid percentage',line=dict(width=3, )),row=5, col=1)  

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie['water'], ##FF6511
                    mode='lines',
                    name='water',line=dict(width=3, )),row=6, col=1)  


#fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["predict start loss tph"],
#                    mode='lines',
#                    name="start loss TPH"),row=7, col=1)

fig.add_trace(go.Scatter(x=test_time_serie.index, y=test_time_serie["loss of TPH"],
                    mode='lines',
                    name="loss of TPH"),row=7, col=1)


fig.update_layout(height=2000, width=1500, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")


fig.show()

# Save recommendation

In [None]:
df_rec=test_time_serie[["recommended HH charge cell","TPH optimum","TPH predict","loss of TPH","start loss tph"]]
df_rec.head()

In [None]:
df_rec.to_csv('../../data/processed data/recommendation_'+pd.to_datetime('today').strftime('%d%B%Y')+'.csv', index=True)

In [None]:
list_TPH=np.array([1,1,1,1,2,2,2])
    
index_max=np.argmax(list_TPH)
index_max
