# Generación de resultados

## Introducción

### Imports

In [1]:
# -*- coding: utf-8 -*-

# Creada por Maximiliano Jones

# Manejo de datos
from ast import parse
import pandas as pd

# Funcionalidades de la aplicación
import streamlit as st
import base64
import pandas_profiling
from streamlit_pandas_profiling import st_profile_report

# Manejod del tiempo/fechas
import pytz
import time

# Automated Classification
from pycaret import classification as supervised
# import pycaret.anomaly as unsupervised

import plotly.express as px

2022-09-19 22:48:23.834 INFO    visions.backends: Pandas backend loaded 1.3.5
2022-09-19 22:48:23.851 INFO    visions.backends: Numpy backend loaded 1.19.5
2022-09-19 22:48:23.853 INFO    visions.backends: Pyspark backend NOT loaded
2022-09-19 22:48:23.854 INFO    visions.backends: Python backend loaded


### Funciones auxiliares

In [4]:
# @st.cache(suppress_st_warning=True)
def load_data(path):
    '''
    ARGS: path to the local .csv file
    Load data and search for the Date_Time column to index the dataframe by a datetime value.

    '''

    data = pd.read_csv(path, sep=None, engine='python',encoding = 'utf-8-sig',parse_dates= True)

    try:
        data['Date_Time'] = pd.to_datetime(data['Date_Time'])
        # st.sidebar.write('Se encontró una columa "Date_time"')
        data.set_index("Date_Time", inplace=True)
        chile = pytz.timezone("Chile/Continental")
        data.index = data.index.tz_localize(pytz.utc).tz_convert(chile)
        # st.dataframe(data)
        return data
    except:
        try:
            data['Datetime'] = pd.to_datetime(data["Date_Time"])
            # st.sidebar.write('Se encontró una columa "Datetime"')
            data.set_index("Datetime", inplace=True)
            chile = pytz.timezone("Chile/Continental")
            data.index = data.index.tz_localize(pytz.utc).tz_convert(chile)
            # st.dataframe(data)
            return data
        except:
            # st.write("Se entró en el tercer except")
            # st.sidebar.write("No se encontró columna Date_Time")
            return data

# @st.cache(allow_output_mutation=True,suppress_st_warning=True)
def entrenar_modelos(df, etiqueta, metrica, ensamble=True, debug=True):

    '''
    ARGS: dataframe (pd.DataFrame),
    etiqueta con nombre de dataframe.column (str),
    metrica puede ser ['f1', 'accuracy', 'recall'] (str) y
    ensamble[default=True, False] (boolean)
    '''

    # setup
    pycaret_s = supervised.setup(df, target=etiqueta, session_id=123, silent=True, use_gpu=False, profile=False, log_experiment=False, fix_imbalance=True)
    # model training and selection
    if ensamble:
        # with st.snow():
        top10 = supervised.compare_models(n_select=10)
        top5 = top10[0:4]
        # tune top 5 base models
        grid_a = supervised.pull()
        tuned_top5 = [supervised.tune_model(i, fold=5, optimize='F1', search_library='scikit-optimize') for i in top5]
        # grid_b = supervised.pull()
        stacker = supervised.stack_models(estimator_list=tuned_top5[1:], meta_model=tuned_top5[0])
        # if debug:
            # st.write(top10)
            # st.write(grid_b)
        # else:
        #     pass
            
        #
        return (stacker, grid_a, grid_a)
    else:
        best = supervised.compare_models(sort=metrica, n_select=3)
        grid = supervised.pull()
        return (best, grid, grid)


def deteccion_no_supervisada(df, metrica, etiqueta=None, ensamble=True):
    return None

def cargar_modelo(df, modelo):
    modelo = supervised.load_model('stack inicial')

    return (modelo)

In [3]:
# Generación de gráficos

def generar_distrib(loaded_df, etiqueta):
    figura = px.histogram(df,x=etiqueta,y=df[etiqueta],color='Etiqueta',template='plotly_white',
                    marginal='box',opacity=0.7,nbins=100,color_discrete_sequence=[colors_green[3],colors_blue[3]],
                    barmode='group',histfunc='count')
                    
    figura.update_layout(
        font_family='monospace',
        title=dict(text=etiqueta,x=0.53,y=0.95,
                font=dict(color=colors_dark[2],size=20)),
        xaxis_title_text=etiqueta,
        yaxis_title_text='Count',
        legend=dict(x=1,y=0.96,bordercolor=colors_dark[4],borderwidth=0,tracegroupgap=5),
        bargap=0.3,
    )
    return figura

def generar_graficos(selected_df, etiqueta):


    describe=selected_df.describe().T.style.bar(subset=['mean'], color='#E68193')\
            .background_gradient(subset=['std'], cmap='mako_r')\
                .background_gradient(subset=['50%'], cmap='mako')
    a2.subheader("Descripción estadística de los datos cargados")
    a2.dataframe(describe)
    df=pd.DataFrame()
    df['etiqueta conjunta'] = selected_df['Etiqueta'].replace([0,1],['normal','anomalía'])
    d = pd.DataFrame(df['etiqueta conjunta'].value_counts())

    fig = px.pie(d,values='etiqueta conjunta',names=['normal','anomalía'],hole=0.4,opacity=0.6,
                color_discrete_sequence=[colors_blue[3],colors_green[3]],
                labels={'label':'etiqueta conjunta','etiqueta conjunta':'No. Of Samples'})

    fig.add_annotation(text='Los resultados sugieren un set de datos desbalanceados',
                    x=1.3,y=0.9,showarrow=False,font_size=18,opacity=0.7,font_family='monospace')
    fig.add_annotation(text='Etiquetado <br> Experto',
                    x=0.5,y=0.5,showarrow=False,font_size=14,opacity=0.7,font_family='monospace')

    fig.update_layout(
        font_family='monospace',
        title=dict(text='. Cuántos datos corresponden a datos normales?',x=0.47,y=0.98,
                font=dict(color=colors_dark[2],size=28)),
        legend=dict(x=0.37,y=-0.05,orientation='h',traceorder='reversed'),
        hoverlabel=dict(bgcolor='white'))

    fig.update_traces(textposition='outside', textinfo='percent+label')
    st.subheader('Composición de etiquetas:')
    st.plotly_chart(fig, use_container_width=True)

    st.subheader('Distribuciones de las características:')
    selected_df['Etiqueta'].replace([0,1],['normal','anomalía'],inplace=True)
    for label in selected_features:
        f = generar_distrib(selected_df,label)

        st.plotly_chart(f, use_container_width=True)



In [6]:
f = load_data('data\Horcon_1L_full.csv')

In [9]:
column_names = f.columns.to_list()
selected_df = 