# Visualization

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
import xgboost as xgb

from numpy import mean,sqrt,square
from sklearn.metrics import mean_squared_error
import pickle
import missingno as msno
import auxiliar_functions as af
sns.set_style("darkgrid")
pd.set_option('display.max_columns', None)
pd.options.display.max_colwidth = 100

In [3]:
# read df full csv
df_features=pd.read_csv('../../data/processed data/df_feature_engineering_18December2022.csv', parse_dates=['Timestamp'], index_col='Timestamp')#[variables_interes]#["2020":"2021"]
df_features.dropna(inplace=True)
df_features.sort_index(inplace=True)
df_features.shape

(859335, 403)

# Data test

In [4]:
# Test
test=pd.concat([df_features["2020-04"],df_features["2021-09"],df_features["2022"]])
# Ytest reg
Ytest=test["TPH"]
Xtest=test.drop(columns=["TPH"])

In [5]:
df1=Xtest.copy()

In [7]:
test["dif_TPH_HH"]=test["HH TPH"]-test["TPH"]
test["dif_CC_HH"]=test["HH charge cell"]-test["charge cell"]
test["dif_CC_LL"]=test["charge cell"]-test["LL charge cell"]

# Información de perdida de TPH en los proximos 5 minutos
for i in range(1,6):
    test[f"loss of TPH_{i}"]=test["loss of TPH"].shift(i)

test["greater than HH CC"]=test["dif_CC_HH"].apply(lambda x: 0 if x>1 else 1)
for i in range(1,6):
    test[f"greater than HH CC_{i}"]=test["greater than HH CC"].shift(i)

test["less than LL CC"]=test["dif_CC_LL"].apply(lambda x: 0 if x>1 else 1)
for i in range(1,6):
    test[f"less than LL CC_{i}"]=test["less than LL CC"].shift(i)

test["greater than Rec HH CC"]=test.apply(lambda x: 1 if x["HH charge cell"]<=x["charge cell"] else 0,axis=1)
for i in range(1,6):
    test[f"greater than Rec HH CC_{i}"]=test["greater than Rec HH CC"].shift(i)

test["Rec HH CC less than HH CC"]=test.apply(lambda x: 1 if x["HH charge cell"]<x["HH charge cell"] else 0,axis=1)
for i in range(1,6):
    test[f"Rec HH CC less than HH CC_{i}"]=test["Rec HH CC less than HH CC"].shift(i)

test.dropna(inplace=True)
test["loss of TPH"]=test.apply(lambda x: 1 if (x['loss of TPH']==1) or (x['loss of TPH_1']==1) or (x['loss of TPH_2']==1) or (x['loss of TPH_3']==1) or (x['loss of TPH_4']==1) or (x['loss of TPH_5']==1) else 0,axis=1)
test=af.loss_tph(test,"loss of TPH")

df_full=test.copy()

list_df=[]
for i in df_full[df_full["start loss tph"]==1].index:
    try:
        #i="2022-02-27 12:31:00"
        inicio_perdida=i
        test=df_full[inicio_perdida:inicio_perdida+timedelta(hours=5)]
        fin_perdida=min(test[(test["end loss tph"]==1)].index)+timedelta(minutes=1)
        inicio_evento=inicio_perdida-timedelta(minutes=5)
        data=df_full.copy()[inicio_evento:fin_perdida]
        #display(data)
        dic_df={"inicio evento":inicio_evento,"inicio perdida":inicio_perdida,"fin perdida":fin_perdida,"variación HH TPH":data["HH TPH"].std()!=0,"Actua sistema de control":any(data["greater than HH CC"]==1),
        "Supera recomendación del modelo":any(data["greater than Rec HH CC"]==1),"Recomendación menor a HH CC":any(data["Rec HH CC less than HH CC"]==1),"CC menor a LL CC":any(data["less than LL CC"]==1),
        "Causalidad":any(data[data["greater than Rec HH CC"]==1].index<=inicio_perdida)}
        list_df.append(pd.DataFrame(dic_df,index=[1]))  
    except:
        #print(i)
        pass

df_tph_loss=pd.concat(list_df).reset_index(drop=True)


In [18]:
# Perdida de TPH por superar recomendación [+]
evento1=df_tph_loss[(df_tph_loss["Actua sistema de control"]==False) & (df_tph_loss["variación HH TPH"]==False) & (df_tph_loss["CC menor a LL CC"]==False) & \
     (df_tph_loss["Supera recomendación del modelo"]==False)].reset_index(drop=True)
evento1

Unnamed: 0,inicio evento,inicio perdida,fin perdida,variación HH TPH,Actua sistema de control,Supera recomendación del modelo,Recomendación menor a HH CC,CC menor a LL CC,Causalidad
0,2020-04-01 15:33:00,2020-04-01 15:38:00,2020-04-01 15:47:00,False,False,False,False,False,False
1,2020-04-02 01:21:00,2020-04-02 01:26:00,2020-04-02 01:37:00,False,False,False,False,False,False
2,2020-04-02 04:41:00,2020-04-02 04:46:00,2020-04-02 05:09:00,False,False,False,False,False,False
3,2020-04-02 19:12:00,2020-04-02 19:17:00,2020-04-02 19:25:00,False,False,False,False,False,False
4,2020-04-02 19:31:00,2020-04-02 19:36:00,2020-04-02 20:02:00,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...
210,2022-02-27 04:51:00,2022-02-27 04:56:00,2022-02-27 05:21:00,False,False,False,False,False,False
211,2022-02-27 05:29:00,2022-02-27 05:34:00,2022-02-27 05:40:00,False,False,False,False,False,False
212,2022-02-27 05:46:00,2022-02-27 05:51:00,2022-02-27 06:48:00,False,False,False,False,False,False
213,2022-02-27 12:45:00,2022-02-27 12:50:00,2022-02-27 12:58:00,False,False,False,False,False,False


In [9]:
# Perdida de TPH sin superar recomendación [-]
evento2=df_tph_loss[(df_tph_loss["Actua sistema de control"]==False) & (df_tph_loss["variación HH TPH"]==False) & (df_tph_loss["CC menor a LL CC"]==False) & (df_tph_loss["Recomendación menor a HH CC"]==False)\
    & (df_tph_loss["Supera recomendación del modelo"]==False)\
    ].reset_index(drop=True)
evento2

Unnamed: 0,inicio evento,inicio perdida,fin perdida,variación HH TPH,Actua sistema de control,Supera recomendación del modelo,Recomendación menor a HH CC,CC menor a LL CC,Causalidad
0,2020-04-01 15:33:00,2020-04-01 15:38:00,2020-04-01 15:47:00,False,False,False,False,False,False
1,2020-04-02 01:21:00,2020-04-02 01:26:00,2020-04-02 01:37:00,False,False,False,False,False,False
2,2020-04-02 04:41:00,2020-04-02 04:46:00,2020-04-02 05:09:00,False,False,False,False,False,False
3,2020-04-02 19:12:00,2020-04-02 19:17:00,2020-04-02 19:25:00,False,False,False,False,False,False
4,2020-04-02 19:31:00,2020-04-02 19:36:00,2020-04-02 20:02:00,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...
210,2022-02-27 04:51:00,2022-02-27 04:56:00,2022-02-27 05:21:00,False,False,False,False,False,False
211,2022-02-27 05:29:00,2022-02-27 05:34:00,2022-02-27 05:40:00,False,False,False,False,False,False
212,2022-02-27 05:46:00,2022-02-27 05:51:00,2022-02-27 06:48:00,False,False,False,False,False,False
213,2022-02-27 12:45:00,2022-02-27 12:50:00,2022-02-27 12:58:00,False,False,False,False,False,False


In [None]:




# Perdida de TPH por superar recomendación [+]
evento1=df_tph_loss[(df_tph_loss["Actua sistema de control"]==False) & (df_tph_loss["variación HH TPH"]==False) & (df_tph_loss["CC menor a LL CC"]==False) & (df_tph_loss["Recomendación menor a HH CC"]==True) \
    & (df_tph_loss["Supera recomendación del modelo"]==True)\
    & (df_tph_loss["Causalidad"]==True)].reset_index(drop=True)#.sample(1)
list_loss_tph=[]
for i in range(len(evento1)):
    data=df_full[evento1.iloc[i]["inicio perdida"]:evento1.iloc[i]["fin perdida"]]
    list_loss_tph.append(data["dif_TPH_HH"].sum())
tph1=sum(list_loss_tph)

# Perdida de TPH sin superar recomendación [-]
evento2=df_tph_loss[(df_tph_loss["Actua sistema de control"]==False) & (df_tph_loss["variación HH TPH"]==False) & (df_tph_loss["CC menor a LL CC"]==False) & (df_tph_loss["Recomendación menor a HH CC"]==False)\
    & (df_tph_loss["Supera recomendación del modelo"]==False)\
    ].reset_index(drop=True)
list_loss_tph=[]
for i in range(len(evento2)):
    data=df_full[evento2.iloc[i]["inicio perdida"]:evento2.iloc[i]["fin perdida"]]
    list_loss_tph.append(data["dif_TPH_HH"].sum())
tph2=sum(list_loss_tph)

metric_final=tph1-tph2

return metric_final