# model TPH

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from datetime import timedelta
import re
from dateutil.parser import parse
import string
import warnings
warnings.filterwarnings('ignore')
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_percentage_error
import missingno as msno
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler
import scipy
import researchpy as rp
from matplotlib.offsetbox import AnchoredText
#import pacmap
from sklearn.compose import ColumnTransformer
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
import plotly.express as px
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split 
# Import Halving Grid Search
from sklearn.experimental import enable_halving_search_cv 
from sklearn.model_selection import HalvingGridSearchCV
import xgboost as xgb
from scipy.stats import kurtosis,skew
from numpy import mean,sqrt,square
from scipy.fftpack import fft, fftfreq
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import PredefinedSplit
sns.set_style("darkgrid")
pd.set_option('display.max_columns', None)
pd.options.display.max_colwidth = 100

In [None]:
# Función que filtra tags por alta correlación
def corrkill(dataframe, features, corr_cut=0.8):

    df = dataframe[features]
    dfcorr = pd.DataFrame(np.triu(df.corr()), columns=df.columns, index=df.columns)
    dfcorr = dfcorr.stack().reset_index()
    dfcorr.columns = ['Feat1','Feat2','Val']
    dfcorr = dfcorr[~dfcorr['Val'].isin([0,1])]
    dfcorr = dfcorr[dfcorr['Val'].abs()>corr_cut]
    dfcorr["Val"]=dfcorr["Val"].abs()
    
    return list(dfcorr['Feat2']), dfcorr.sort_values(by="Val",ascending=False)

In [None]:
variables_interes=['pyrite law', 'chalcopyrite law',
       'chalcocite law', 'covelin law', 'crusher index', 'sag power index',
       'ball work index', 'bornite law','charge cell', 'TPH','water', 'speed','HH TPH', 'granulometry', 'Edad','loss of TPH',
       "power","solid percentage","HH charge cell","LL charge cell","delta HH TPH","delta HH charge cell","delta LL charge cell"]
variables_interes

In [None]:
# read
tags=pd.read_excel("../../../data/meta data/tags relevantes.xlsx")
tags_cc=tags.tag.to_list()
dic={}
for i,j in zip(tags.tag,tags.description):
    dic[i]=j

In [None]:
# Se lee csv
liners_age=pd.read_csv('../../../data/consolidated data/liners_age_03January2022.csv', parse_dates=['Timestamp'], index_col='Timestamp')
# Se lee csv
cleaned=pd.read_csv('../../../data/consolidated data/cleaned_15December2022.csv', parse_dates=['Timestamp'], index_col='Timestamp')
cleaned.sort_index(inplace=True)
cleaned.rename(columns=dic,inplace=True)
df_cleaned_age=cleaned.join(liners_age).dropna()
df_cleaned_age["delta HH TPH"]=df_cleaned_age["HH TPH"]-df_cleaned_age["TPH"]
df_cleaned_age["delta HH charge cell"]=df_cleaned_age["HH charge cell"]-df_cleaned_age["charge cell"]
df_cleaned_age["delta LL charge cell"]=df_cleaned_age["charge cell"]-df_cleaned_age["LL charge cell"]
df_cleaned_age["loss of TPH"]=df_cleaned_age["delta HH TPH"].apply(lambda x: 1 if x>100 else 0)
df_cleaned_age=df_cleaned_age[variables_interes]
df_cleaned_age.head()

In [None]:
df_cleaned_age["TPH"].ewm(span=5,min_periods=0,ignore_na=True).mean()

In [None]:
df_corr = df_cleaned_age.dropna().corr()
corr_tph=pd.DataFrame(df_corr.abs().TPH.sort_values(ascending=False))
top_corr=corr_tph[corr_tph.TPH>=0]
top_corr

# Feature Engineering

In [None]:
def feature_engineering(df):
      
    targets = ["TPH","Edad","loss of TPH","HH TPH"]
    df_vars = df.copy()
    column_inicial=df_vars.columns

    for column in df_vars:
    
        if (column not in targets):

            ## 10 min
            #df_vars[f"skew_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x:skew(x))
            #df_vars[f"kurt_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x:kurtosis(x))
            df_vars[f"mean_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x:np.nanmean(list(x)))
            df_vars[f"max_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x:np.nanmax(list(x)))
            #df_vars[f"sum_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x:np.nansum(list(x)))
            df_vars[f"min_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x:np.nanmin(list(x)))
            df_vars[f"rms_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x: sqrt(mean(square(list(x)))))
            df_vars[f"var_{column}_10"]=df_vars[column].rolling(window=10).apply(lambda x: np.nanvar(x))

            # 5 min
            df_vars[f"mean_{column}_5"]=df_vars[column].rolling(window=5).apply(lambda x:np.nanmean(list(x)))
            df_vars[f"max_{column}_5"]=df_vars[column].rolling(window=5).apply(lambda x:np.nanmax(list(x)))
            #df_vars[f"sum_{column}_5"]=df_vars[column].rolling(window=5).apply(lambda x:np.nansum(list(x)))
            df_vars[f"min_{column}_5"]=df_vars[column].rolling(window=5).apply(lambda x:np.nanmin(list(x)))
            df_vars[f"rms_{column}_5"]=df_vars[column].rolling(window=5).apply(lambda x: sqrt(mean(square(list(x)))))
            df_vars[f"var_{column}_5"]=df_vars[column].rolling(window=5).apply(lambda x: np.nanvar(x))

            # 3 min
            #df_vars[f"skew_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x:skew(x))
            #df_vars[f"kurt_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x:kurtosis(x))
            df_vars[f"mean_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x:np.nanmean(list(x)))
            df_vars[f"max_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x:np.nanmax(list(x)))
            #df_vars[f"sum_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x:np.nansum(list(x)))
            df_vars[f"min_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x:np.nanmin(list(x)))
            df_vars[f"rms_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x: sqrt(mean(square(list(x)))))
            df_vars[f"var_{column}_3"]=df_vars[column].rolling(window=3).apply(lambda x: np.nanvar(x))
          

    for column in column_inicial:

        if (column not in targets):
            
            # lags  
            df_vars[f"{column}_(t-1)"] = df_vars[column].shift(1)
            df_vars[f'{column}_(t-2)'] = df_vars[column].shift(2)
            df_vars[f'{column}_(t-3)'] = df_vars[column].shift(3)
            df_vars[f'{column}_(t-4)'] = df_vars[column].shift(4)
            df_vars[f'{column}_(t-5)'] = df_vars[column].shift(5)
           
    
    return df_vars
   

In [None]:
df_features=feature_engineering(df_cleaned_age)

In [None]:
# Se guarda en csv
df_features.to_csv('../../../data/processed data/df_feature_engineering_'+pd.to_datetime('today').strftime('%d%B%Y')+'.csv', index=True)

In [None]:
# read df full csv
df_features0=pd.read_csv('../../../data/processed data/df_feature_engineering_18December2022.csv', parse_dates=['Timestamp'], index_col='Timestamp')#["2020":"2021"]
df_features0["TPH"]=df_features0["TPH"].shift(-10)
df_features0["loss of TPH"]=df_features0["loss of TPH"].shift(-10)
df_features0.dropna(inplace=True)
df_features0.sort_index(inplace=True)
df_features0.shape

In [None]:
# Verificando si hay duplicados
u=[i for i in df_features0.columns if not re.match(".*delta HH TPH",i) ]
df_features=df_features0[u].copy()
df_features.head(1)

# Correlación entre variables

In [None]:
# Se obtienen tags altamente correlacionados y matriz que permite visualizar cuales son los par de tags altamente correlacionados
collinearity, table_cor=corrkill(df_features,df_features.columns)

In [None]:
# Tags altamente correlacionados
table_cor

In [None]:
df_corr = df_features.dropna().corr()
corr_tph=pd.DataFrame(df_corr.abs().TPH.sort_values(ascending=False))


In [None]:
top_corr=corr_tph[corr_tph.TPH>=0.25]
top_corr

# Equilibrio de eventos de perdidas de TPH

In [None]:
# Entrenamiento
entrenamiento=pd.concat([df_features["2020-01":"2020-03"],df_features["2020-05"],df_features["2020-07":"2020-12"],df_features["2021-01":"2021-08"],df_features["2021-10"],df_features["2021-12"]])
entrenamiento.head(1)

In [None]:
# Validación
validacion=pd.concat([df_features["2020-06"],df_features["2021-11"]])
validacion.head(1)

In [None]:
# Test
test=pd.concat([df_features["2020-04"],df_features["2021-09"],df_features["2022"]])
test.head(1)

In [None]:
tags=df_features.columns.to_list()
tags.remove("loss of TPH")
#tags.remove("TPH")

In [None]:
# Sobremuestreo: Aumentar numero de datos de la clase minoritaria
# ROS(Duplica clases):
ros=RandomOverSampler(random_state=0) # Random_state=0

# ROS:
Xtrain, Ytrain=ros.fit_resample(entrenamiento[tags],entrenamiento["loss of TPH"])

In [None]:
# Ytrain reg
Ytrain=Xtrain["TPH"]
Xtrain=Xtrain.drop(columns=["TPH"])

In [None]:
# Yval reg
Yval=validacion["TPH"]
Xval=validacion.drop(columns=["TPH"])

In [None]:
Ytest0=test[test["loss of TPH"]==0]["TPH"]


In [None]:
Ytest1=test[test["loss of TPH"]==1]["TPH"]


In [None]:
Xtest0=test[test["loss of TPH"]==0].drop(columns=["TPH","loss of TPH"])


In [None]:
Xtest1=test[test["loss of TPH"]==1].drop(columns=["TPH","loss of TPH"])


In [None]:
# Ytest reg
Ytest=test["TPH"]
Xtest=test.drop(columns=["TPH"])

In [None]:
#Unión del conjunto de Entrenamiento y Validación:
Entre_Vali_features=pd.concat([Xval,Xtrain])
Entre_Vali_target=pd.concat([Yval,Ytrain])

#Indices para separar ambos conjuntos:
Entre_indice=np.full(len(validacion),-1)
Vali_indice=np.full(len(entrenamiento),0)
Indices=np.append(Entre_indice,Vali_indice)
x=PredefinedSplit(Indices)

In [None]:
Color_loss=test["loss of TPH"]
Color_loss=Color_loss.astype(str)

# Feature selection

In [None]:
# Preprocesamiento de datos

preprocessing_transformer = ColumnTransformer(
    transformers=[
        ('MinMax', MinMaxScaler(),
        Xtrain.columns.to_list()
        ),
    ])

pipe = Pipeline(
    [
        ("preprocesamiento", preprocessing_transformer), 
     ("clf", xgb.XGBRegressor(seed=1,eval_metric='rmse'
     ))
    ]
)

In [None]:
pipe.fit(Xtrain, Ytrain)

**General**

In [None]:
Ypred= pipe.predict(Xtest)

# Métricas
print('R2: ',r2_score(Ytest, Ypred),'\n','RMSE: ',mean_squared_error(Ytest, Ypred, squared=False))

In [None]:
df_importance=pd.DataFrame({"tag":Xtrain.columns,"importance":pipe["clf"].feature_importances_}).sort_values(by="importance",ascending=False).reset_index(drop=True)
df_importance.head()

In [None]:
pd.options.plotting.backend = "plotly"
fig=df_importance["importance"].plot(kind='hist')
fig.update_layout(height=500, width=1200,title="Feature importance XGBoost (Gain)",) 
fig.show()

In [None]:
# Plot
fig = go.Figure([go.Bar(x=df_importance["tag"], y=df_importance["importance"])])
fig.update_layout(height=500, width=1200,title="Feature importance XGBoost (Gain)",) 
fig.show()

In [None]:
xgboost_fs=df_importance[:100].tag.to_list()
xgboost_fs

In [None]:
# Se obtienen tags altamente correlacionados y matriz que permite visualizar cuales son los par de tags altamente correlacionados
collinearity, table_cor=corrkill(Xtrain,xgboost_fs,corr_cut=0.9)

In [None]:
# Tags altamente correlacionados
table_cor

In [None]:
tag_select = [i for i in xgboost_fs if i not in collinearity]
len(tag_select)

In [None]:
# Preprocesamiento de datos

preprocessing_transformer = ColumnTransformer(
    transformers=[
        ('MinMax', MinMaxScaler(),
        tag_select
        ),
    ])

pipe = Pipeline(
    [
        ("preprocesamiento", preprocessing_transformer), 
     ("clf", xgb.XGBRegressor(seed=1,eval_metric='rmse'
     ))
    ]
)

In [None]:
pipe.fit(Xtrain, Ytrain)

**General**

In [None]:
Ypred= pipe.predict(Xtest)

# Métricas
print('R2: ',r2_score(Ytest, Ypred),'\n','RMSE: ',mean_squared_error(Ytest, Ypred, squared=False))

# GridSearch

In [None]:
# Preprocesamiento de datos

preprocessing_transformer = ColumnTransformer(
    transformers=[
        ('MinMax', MinMaxScaler(),
        tag_select
        #features.columns.to_list()
        ),
    ])

pipe = Pipeline(
    [
        ("preprocesamiento", preprocessing_transformer), 
        #("Selection", SelectPercentile(f_classif, percentile=10)),
        #("PCA",PCA(n_components=150 )), #0.08
     ("reg", xgb.XGBRegressor(seed=1,eval_metric='rmse'))
    ]
)


param_grid_reg = {'reg__learning_rate': list(np.linspace(0.001,1,10)), # Boosting learning rate
                  'reg__gamma':list(np.linspace(0.0001,50,5)), # Minimum loss reduction required to make a further partition on a leaf node of the tree.
                  #'reg__max_depth':[5,10,12,15,20], # Maximum tree depth for base learners.
                  #'reg__min_child_weight':[10,5,1,2,3], # Minimum sum of instance weight(hessian) needed in a child.
                  #'reg__colsample_bytree':[0.5,0.8,1], # Subsample ratio of columns when constructing each tree.
                  'reg__n_estimators': [100,200,300], # Number of gradient boosted trees.   
                  #'reg__reg_alpha': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              #'reg__reg_lambda': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
               #'Selection__percentile': [10,20,30,50,70,80,100]              
                 } 

# Clasificación
gs_reg = HalvingGridSearchCV(pipe, param_grid_reg, scoring='neg_mean_absolute_error',cv=x,refit=True,
                             verbose=10,
                             aggressive_elimination=True
                            )

# Fit
gs_reg.fit(Entre_Vali_features, Entre_Vali_target)


In [None]:
gs_reg.best_score_

In [None]:
gs_reg.best_params_

In [None]:
# General
Ypred= gs_reg.predict(Xtest)
# Métricas
print('R2: ',r2_score(Ytest, Ypred),'\n','RMSE: ',mean_squared_error(Ytest, Ypred, squared=False))

# Train model

In [None]:
tag_select

In [None]:
tag_select=['min_water_3',
 'min_solid percentage_10',
 'HH TPH',
 'max_delta LL charge cell_10',
 'power',
 'rms_delta LL charge cell_3',
 'var_delta HH charge cell_10',
 'max_covelin law_10',
 'LL charge cell_(t-2)',
 'min_granulometry_5',
 'max_bornite law_10',
 'min_charge cell_5',
 'chalcocite law_(t-2)',
 'max_sag power index_5',
 'min_speed_3',
 'var_bornite law_3',
 'var_speed_3',
 'min_pyrite law_10',
 'crusher index_(t-5)',
 'var_power_3',
 'var_chalcocite law_3',
 'var_bornite law_5',
 'var_solid percentage_3',
 'var_speed_10',
 'ball work index_(t-1)',
 'var_chalcocite law_5',
 'var_water_3',
 'chalcopyrite law_(t-5)',
 'var_crusher index_10',
 'var_chalcopyrite law_3',
 'var_granulometry_3',
 'var_delta HH charge cell_3']

In [None]:
gs_reg.best_params_

In [None]:
# Preprocesamiento de datos

preprocessing_transformer = ColumnTransformer(
    transformers=[
        ('MinMax', MinMaxScaler(),
        tag_select
        ),
    ])

pipe = Pipeline(
    [
        ("preprocesamiento", preprocessing_transformer), 
     ("clf", xgb.XGBRegressor(seed=1,eval_metric='rmse',gamma= 12.500074999999999, learning_rate=0.112, n_estimators= 100
     
     ))
    ]
)
pipe.fit(Xtrain, Ytrain)

**General**

In [None]:
# General
Ypred= pipe.predict(Xtest0)
# Métricas
print('R2: ',r2_score(Ytest0, Ypred),'\n','RMSE: ',mean_squared_error(Ytest0, Ypred, squared=False))

In [None]:
# General
Ypred= pipe.predict(Xtest1)
# Métricas
print('R2: ',r2_score(Ytest1, Ypred),'\n','RMSE: ',mean_squared_error(Ytest1, Ypred, squared=False))

In [None]:
Ypred= pipe.predict(Xtest)
# Métricas
print('R2: ',r2_score(Ytest, Ypred),'\n','RMSE: ',mean_squared_error(Ytest, Ypred, squared=False),'\n','MAPE :',mean_absolute_percentage_error(Ytest, Ypred)*100)

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=Ypred[:10000], y=Ytest[:10000],
                    mode='markers',
                    name='(Pred,Test)'))

fig.add_trace(go.Scatter(x=[i for i in range(2500,4700)], y=[i for i in range(2500,4700)],
                    mode='markers', name='Identidad'))
                    
fig.update_layout(height=500, width=1200, title_text="Scatter predict/test", xaxis_title="Predict",
    yaxis_title="Test")
fig.show()

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(y=Ypred[:10000], 
                    mode='lines',name="predict"
                    ))

fig.add_trace(go.Scatter(y=Ytest[:10000],
                    mode='lines',name="Test",#marker=dict(color=list(Color_loss[:1000]))
                    ))

fig.update_layout(height=500, width=1200, title_text="TPH vs TPH predict")
fig.show()

**Histograma del error RMSE y desviación estandar**

In [None]:
(Ytest-Ypred).abs().std()

In [None]:
(Ytest-Ypred).abs().mean()

In [None]:
pd.options.plotting.backend = "plotly"
df_dist_error=pd.DataFrame()
df_dist_error["Error"]=(Ytest-Ypred)#.abs()
fig=df_dist_error["Error"].plot(kind='hist')
fig.update_layout(height=500, width=1200,title="Distribución Error RMSE") 
fig.show()

**2020**

In [None]:
Ypred= pipe.predict(Xtest["2020"])

# Métricas
print('R2: ',r2_score(Ytest["2020"], Ypred),'\n','RMSE: ',mean_squared_error(Ytest["2020"], Ypred, squared=False))

**2021**

In [None]:
Ypred= pipe.predict(Xtest["2021"])

# Métricas
print('R2: ',r2_score(Ytest["2021"], Ypred),'\n','RMSE: ',mean_squared_error(Ytest["2021"], Ypred, squared=False))

**2022**

In [None]:
Ypred= pipe.predict(Xtest["2022-01"])

# Métricas
print('R2: ',r2_score(Ytest["2022-01"], Ypred),'\n','RMSE: ',mean_squared_error(Ytest["2022-01"], Ypred, squared=False))

In [None]:
Ypred= pipe.predict(Xtest["2022-02"])

# Métricas
print('R2: ',r2_score(Ytest["2022-02"], Ypred),'\n','RMSE: ',mean_squared_error(Ytest["2022-02"], Ypred, squared=False))

# Simulation time series predict

# 2020

In [None]:
# test 2021
test_time_serie=Xtest["2020"]
test_time_serie["TPH"]=Ytest["2020"]
test_time_serie["TPH predict"]=pipe.predict(test_time_serie)


# Métricas
print('R2: ',r2_score(test_time_serie["TPH"], test_time_serie["TPH predict"]),'\n','RMSE: ',mean_squared_error(test_time_serie["TPH"], test_time_serie["TPH predict"], squared=False))
test_time_serie["TPH"]=test_time_serie["TPH"].shift(10)
test_time_serie.dropna(inplace=True)
data=test_time_serie
data.head(1)

In [None]:
titulos=tuple(["TPH & HH TPH", "charge cell","granulometry","SPI","speed","solid percentage"])
fig = make_subplots(
    rows=6, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True
    )


fig.add_trace(go.Scatter(x=data.index, y=data['TPH'], ##FF6511
                    mode='lines',
                    name="TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['TPH predict'], ##FF6511
                    mode='lines',
                    name="TPH predict",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH TPH'], ##FF6511
                    mode='lines',
                    name="HH TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

#fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"], ##FF6511
#                    mode='lines',
#                    name="regressor model TPH",line=dict(width=1,color="black" ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1) 

fig.add_trace(go.Scatter(x=data.index, y=data['LL charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)   

fig.add_trace(go.Scatter(x=data.index, y=data['granulometry'], ##FF6511
                    mode='lines',
                    name='granulometry',line=dict(width=3,),legendgroup = '1'),row=3, col=1)  


fig.add_trace(go.Scatter(x=data.index, y=data['sag power index'], ##FF6511
                    mode='lines',
                    name="sag power index",line=dict(width=3, ),legendgroup = '1'),row=4, col=1)  



fig.add_trace(go.Scatter(x=data.index, y=data['speed'], ##FF6511
                    mode='lines',
                    name="speed",line=dict(width=3, ),legendgroup = '1'),row=5, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['solid percentage'], ##FF6511
                    mode='lines',
                    name='solid percentage',line=dict(width=3, ),legendgroup = '1'),row=6, col=1)  


#for i in range(1,6):
#   for j in range(1,4):
#        fig.update_xaxes(tickformat="%H:%M",row=i, col=j)


fig.update_layout(height=1000, width=1500, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")
#

fig.show()

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data["TPH"],
                    mode='lines',
                    name='TPH'))
fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"],
                    mode='lines',
                    name='TPH predict'))

fig.add_trace(go.Scatter(x=data.index, y=data["HH TPH"],
                    mode='lines',
                    name='HH TPH'))

fig.update_layout(height=500, width=1200, title_text="TPH vs TPH predict"#,xaxis6_rangeslider_visible=True
,#template="plotly_dark"
)                   
fig.show()

In [None]:
fig.write_html("file.html")

# 2021

In [None]:
# test 2021
test_time_serie=Xtest["2021"]
test_time_serie["TPH"]=Ytest["2021"]
test_time_serie["TPH predict"]=pipe.predict(test_time_serie)


# Métricas
print('R2: ',r2_score(test_time_serie["TPH"], test_time_serie["TPH predict"]),'\n','RMSE: ',mean_squared_error(test_time_serie["TPH"], test_time_serie["TPH predict"], squared=False))
test_time_serie["TPH"]=test_time_serie["TPH"].shift(10)
test_time_serie.dropna(inplace=True)
data=test_time_serie
data.head(1)

In [None]:
titulos=tuple(["TPH & HH TPH", "charge cell","granulometry","SPI","speed","solid percentage"])
fig = make_subplots(
    rows=6, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True
    )


fig.add_trace(go.Scatter(x=data.index, y=data['TPH'], ##FF6511
                    mode='lines',
                    name="TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['TPH predict'], ##FF6511
                    mode='lines',
                    name="TPH predict",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH TPH'], ##FF6511
                    mode='lines',
                    name="HH TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

#fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"], ##FF6511
#                    mode='lines',
#                    name="regressor model TPH",line=dict(width=1,color="black" ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1) 

fig.add_trace(go.Scatter(x=data.index, y=data['LL charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)   

fig.add_trace(go.Scatter(x=data.index, y=data['granulometry'], ##FF6511
                    mode='lines',
                    name='granulometry',line=dict(width=3,),legendgroup = '1'),row=3, col=1)  


fig.add_trace(go.Scatter(x=data.index, y=data['sag power index'], ##FF6511
                    mode='lines',
                    name="sag power index",line=dict(width=3, ),legendgroup = '1'),row=4, col=1)  



fig.add_trace(go.Scatter(x=data.index, y=data['speed'], ##FF6511
                    mode='lines',
                    name="speed",line=dict(width=3, ),legendgroup = '1'),row=5, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['solid percentage'], ##FF6511
                    mode='lines',
                    name='solid percentage',line=dict(width=3, ),legendgroup = '1'),row=6, col=1)  


#for i in range(1,6):
#   for j in range(1,4):
#        fig.update_xaxes(tickformat="%H:%M",row=i, col=j)


fig.update_layout(height=1000, width=1500, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")
#

fig.show()

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data["TPH"],
                    mode='lines',
                    name='TPH'))
fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"],
                    mode='lines',
                    name='TPH predict'))

fig.add_trace(go.Scatter(x=data.index, y=data["HH TPH"],
                    mode='lines',
                    name='HH TPH'))

fig.update_layout(height=500, width=1200, title_text="TPH vs TPH predict"#,xaxis6_rangeslider_visible=True
,#template="plotly_dark"
)                   
fig.show()

# 2022

In [None]:
# test 2022-01
test_time_serie=Xtest["2022-01"]
test_time_serie["TPH"]=Ytest["2022-01"]
test_time_serie["TPH predict"]=pipe.predict(test_time_serie)


# Métricas
print('R2: ',r2_score(test_time_serie["TPH"], test_time_serie["TPH predict"]),'\n','RMSE: ',mean_squared_error(test_time_serie["TPH"], test_time_serie["TPH predict"], squared=False))
test_time_serie["TPH"]=test_time_serie["TPH"].shift(10)
test_time_serie.dropna(inplace=True)
data=test_time_serie
data.head(1)

In [None]:
titulos=tuple(["TPH & HH TPH", "charge cell","granulometry","SPI","speed","solid percentage"])
fig = make_subplots(
    rows=6, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True
    )


fig.add_trace(go.Scatter(x=data.index, y=data['TPH'], ##FF6511
                    mode='lines',
                    name="TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['TPH predict'], ##FF6511
                    mode='lines',
                    name="TPH predict",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH TPH'], ##FF6511
                    mode='lines',
                    name="HH TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

#fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"], ##FF6511
#                    mode='lines',
#                    name="regressor model TPH",line=dict(width=1,color="black" ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1) 

fig.add_trace(go.Scatter(x=data.index, y=data['LL charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)   

fig.add_trace(go.Scatter(x=data.index, y=data['granulometry'], ##FF6511
                    mode='lines',
                    name='granulometry',line=dict(width=3,),legendgroup = '1'),row=3, col=1)  


fig.add_trace(go.Scatter(x=data.index, y=data['sag power index'], ##FF6511
                    mode='lines',
                    name="sag power index",line=dict(width=3, ),legendgroup = '1'),row=4, col=1)  



fig.add_trace(go.Scatter(x=data.index, y=data['speed'], ##FF6511
                    mode='lines',
                    name="speed",line=dict(width=3, ),legendgroup = '1'),row=5, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['solid percentage'], ##FF6511
                    mode='lines',
                    name='solid percentage',line=dict(width=3, ),legendgroup = '1'),row=6, col=1)  


#for i in range(1,6):
#   for j in range(1,4):
#        fig.update_xaxes(tickformat="%H:%M",row=i, col=j)


fig.update_layout(height=1000, width=1500, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")
#

fig.show()

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data["TPH"],
                    mode='lines',
                    name='TPH'))
fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"],
                    mode='lines',
                    name='TPH predict'))

fig.add_trace(go.Scatter(x=data.index, y=data["HH TPH"],
                    mode='lines',
                    name='HH TPH'))

fig.update_layout(height=500, width=1200, title_text="TPH vs TPH predict"#,xaxis6_rangeslider_visible=True
,#template="plotly_dark"
)                   
fig.show()

In [None]:
# test 2022-02
test_time_serie=Xtest["2022-02"]
test_time_serie["TPH"]=Ytest["2022-02"]
test_time_serie["TPH predict"]=pipe.predict(test_time_serie)


# Métricas
print('R2: ',r2_score(test_time_serie["TPH"], test_time_serie["TPH predict"]),'\n','RMSE: ',mean_squared_error(test_time_serie["TPH"], test_time_serie["TPH predict"], squared=False))
test_time_serie["TPH"]=test_time_serie["TPH"].shift(10)
test_time_serie.dropna(inplace=True)
data=test_time_serie
data.head(1)

In [None]:
titulos=tuple(["TPH & HH TPH", "charge cell","granulometry","SPI","speed","solid percentage"])
fig = make_subplots(
    rows=6, cols=1,
    subplot_titles=titulos,
    #subplot_titles=tuple("TPH y HH TPH,"),
     shared_xaxes=True
    )


fig.add_trace(go.Scatter(x=data.index, y=data['TPH'], ##FF6511
                    mode='lines',
                    name="TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['TPH predict'], ##FF6511
                    mode='lines',
                    name="TPH predict",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH TPH'], ##FF6511
                    mode='lines',
                    name="HH TPH",line=dict(width=3, ),legendgroup = '1'),row=1, col=1)  

#fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"], ##FF6511
#                    mode='lines',
#                    name="regressor model TPH",line=dict(width=1,color="black" ),legendgroup = '1'),row=1, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['HH charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1) 

fig.add_trace(go.Scatter(x=data.index, y=data['LL charge cell'], 
                    mode='lines',
                    name="charge cell",line=dict(width=3, ),legendgroup = '1'),row=2, col=1)   

fig.add_trace(go.Scatter(x=data.index, y=data['granulometry'], ##FF6511
                    mode='lines',
                    name='granulometry',line=dict(width=3,),legendgroup = '1'),row=3, col=1)  


fig.add_trace(go.Scatter(x=data.index, y=data['sag power index'], ##FF6511
                    mode='lines',
                    name="sag power index",line=dict(width=3, ),legendgroup = '1'),row=4, col=1)  



fig.add_trace(go.Scatter(x=data.index, y=data['speed'], ##FF6511
                    mode='lines',
                    name="speed",line=dict(width=3, ),legendgroup = '1'),row=5, col=1)  

fig.add_trace(go.Scatter(x=data.index, y=data['solid percentage'], ##FF6511
                    mode='lines',
                    name='solid percentage',line=dict(width=3, ),legendgroup = '1'),row=6, col=1)  


#for i in range(1,6):
#   for j in range(1,4):
#        fig.update_xaxes(tickformat="%H:%M",row=i, col=j)


fig.update_layout(height=1000, width=1500, title_text="Temporal signal analysis")
fig.update_layout(hovermode="x unified")
#

fig.show()

In [None]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data["TPH"],
                    mode='lines',
                    name='TPH'))
fig.add_trace(go.Scatter(x=data.index, y=data["TPH predict"],
                    mode='lines',
                    name='TPH predict'))

fig.add_trace(go.Scatter(x=data.index, y=data["HH TPH"],
                    mode='lines',
                    name='HH TPH'))

fig.update_layout(height=500, width=1200, title_text="TPH vs TPH predict"#,xaxis6_rangeslider_visible=True
,#template="plotly_dark"
)                   
fig.show()