# funcion para obtener informacion sin limite de datos

In [9]:
import requests
import json
from requests.auth import HTTPBasicAuth
import time
import pandas as pd
import re

# Configuración warnings
import warnings
warnings.filterwarnings('ignore')

def elasticScroll(elasticParameters, query, pages):
    # parametros de salida
    # parametros del indice
    elasticURL = elasticParameters["elasticURL"]
    elasticIndex = elasticParameters["elasticIndex"]
    elasticUser = elasticParameters["elasticUser"]
    elasticPassword = elasticParameters["elasticPassword"]
    
    if(len(elasticURL)==0) or (len(elasticIndex)==0) or (len(elasticUser)==0) or (len(elasticPassword)==0):
        raise Exception("Revisa los parametros")
    # se define la url que apunta al indice de elastic
    url_search = f"{elasticURL}/{elasticIndex}/_search?scroll=1m"
    # se ejecuta la query
    response = requests.get(url_search, json=query, auth=HTTPBasicAuth(elasticUser, elasticPassword))
    # retorna una lista con el resultado de la query
    search = json.loads(response.text)
    # guardamos el scroll id correspondiente a la query
    scroll_id = search["_scroll_id"]
    # url scroll
    url_scroll = f"{elasticURL}/_search/scroll"
    scroll_query = {
                "scroll": "1m",
                "scroll_id": f"{scroll_id}"
            }

    # condiciones iniciales
    scroll_search = {"hits":{"hits":1}}
    if pages:
        # hay paginacion
        # condiciones iniciales
        from_ = pages["from"]
        size_ = pages["size"]
        count = len(search["hits"]["hits"])

        while scroll_search["hits"]["hits"] and count < from_ + size_:
            scroll_response = requests.get(url_scroll, json=scroll_query, auth=HTTPBasicAuth(elasticUser, elasticPassword))
            scroll_search = json.loads(scroll_response.text)
            if not scroll_search["hits"]["hits"]:
                continue
            else:
                search["hits"]["hits"].extend(scroll_search["hits"]["hits"])
            count += len(scroll_search["hits"]["hits"])

        search["hits"]["hits"] = search["hits"]["hits"][from_:from_+size_+1]

    else:
        # Se devuelven todos los resultados
        while scroll_search["hits"]["hits"]:
            scroll_response = requests.get(url_scroll, json=scroll_query,
                                           auth=HTTPBasicAuth(elasticUser, elasticPassword))
            scroll_search = json.loads(scroll_response.text)
            if not scroll_search["hits"]["hits"]:
                continue
            else:
                search["hits"]["hits"].extend(scroll_search["hits"]["hits"])
    # Se elina el campo scroll del scroll_body
    del scroll_query["scroll"]
    # Se elimina el scroll de elasticsearch para liberar memoria
    delete = requests.delete(url_scroll, json=scroll_query, auth=HTTPBasicAuth(elasticUser, elasticPassword))
    return search

In [10]:
# definiendo parametros de entrada

In [11]:
# datos de conexion a elastic
elasticParameters = {"elasticURL": "https://es-dev.e-contact.cl"
                    , "elasticIndex": "lea_sequences-events-banco_de_chile" 
                    , "elasticUser": "jcalderon"
                    , "elasticPassword": "jcalderon123"
                    }

# query custom para obtener fechas
query = {
  "query": {
    "bool": {
      "must": [],
      "filter": [
        {
          "match_all": {}
        },
        {
            
          "range": {
            "interactionData.dateTimeUTC": {
              "format": "strict_date_optional_time",
              "gte": "2022-05-26T00:00:00.000Z",
              "lte": "2022-05-30T00:00:00.000Z"
            }
          }
        }
      ],
      "should": [],
      "must_not": []
    }
  }
}

# total de paginas
pages =  {
    "from": 0,
    "size": 5000
}

# consulta a indice

In [16]:
response = elasticScroll(elasticParameters, query, pages)

# transformar response (dict) a dataframe

In [17]:
from pandas import DataFrame, json_normalize
import numpy as np

df = json_normalize(json.loads(json.dumps(response))["hits"]["hits"])

df.info()

df_00['interactionId'] =df['_source.interactionData.interactionId']
df_00['agentName']     =df['_source.interactionData.agentName']
df_00['duration_ID']   =df['_source.interactionData.duration']
df_00['duration_CS']   =df['_source.conversationSequence.duration']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 49 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   _index                                                   5000 non-null   object 
 1   _type                                                    5000 non-null   object 
 2   _id                                                      5000 non-null   object 
 3   _score                                                   5000 non-null   float64
 4   _source.interactionData.interactionId                    5000 non-null   object 
 5   _source.interactionData.dateTimeUTC                      5000 non-null   object 
 6   _source.interactionData.channelType                      5000 non-null   object 
 7   _source.interactionData.serviceName                      5000 non-null   object 
 8   _source.interactionData.dire

In [23]:
#df['interactionId'] =df['_source.interactionData.interactionId']
#df['agentName']     =df['_source.interactionData.agentName']
#df['duration_ID']   =df['_source.interactionData.duration']
#df['duration_CS']   =df['_source.conversationSequence.duration']

df = df[['interactionId','agentName','duration_ID','duration_CS']]

df.to_csv("C:\\Users\\jcalderon\\Desktop\\Trafico\\Python_2021\\ProyectoBCH\\" + 'Ejecutivos'+'.csv', index = False)

In [8]:
from pandas import DataFrame, json_normalize
import numpy as np

df = json_normalize(json.loads(json.dumps(response))["hits"]["hits"])
df_00 = df [['_source.conversationSequence.topicName','_source.interactionData.interactionId','_source.interactionData.dateTimeUTC']]
df_00['topicName']= df_00['_source.conversationSequence.topicName']
df_00['interactionId']= df_00['_source.interactionData.interactionId']
df_00['Fec']= df_00['_source.interactionData.dateTimeUTC']
df_00['hora']= pd.to_datetime(df_00['Fec'].apply(str).str.slice(start = 0, stop = 13))
df_00['fecha'] = pd.to_datetime(df_00['Fec']).dt.date
df_00['hora'] = pd.to_datetime(df_00['hora']).dt.time
df_00['dia'] = pd.to_datetime(df_00['fecha'])
df_00['dia'] = df_00['dia'].dt.day_name()
df_00["topicName"] = df_00["topicName"].replace({ "": "NO IDENTIFICADO", " ": "NO IDENTIFICADO" })
df_00 = df_00[['topicName','interactionId','fecha','hora','dia']]
df_00['key'] =df_00['topicName'] + ',' + df_00['interactionId'] + ',' + str(df_00['fecha']) + ',' + str(df_00['hora'])+ ',' + df_00['dia']
df_00 = df_00.drop_duplicates()
df_grafo2 = pd.DataFrame({'topicName': ['Corte llamado', 'Red sucursal','Reclamación','Escalamiento de llamada','Reincidencia'], 'matched': True})

############################ AGRUPA SEGUN REQUERIMIENTO (FECHA),(FECHA,HORA),(FECHA,HORA,DIA) ############################

#agrupadoFecha = df_00.groupby(['topicName','fecha']).agg({'key': 'count'}).reset_index()
#agrupadoFecha['cantidad'] = agrupadoFecha['key']
#del agrupadoFecha['key']
#agrupadoFecha['total'] = agrupadoFecha['cantidad'].sum()
#agrupadoFecha = df_grafo2.merge(agrupadoFecha, how='right', on='topicName')
#agrupadoFecha['grafo']=agrupadoFecha['matched']
#del agrupadoFecha['matched']
#agrupadoFecha['grafo']=agrupadoFecha['grafo'].apply(lambda x: 1 if x!=1 else 0)
#agrupadoFecha['grafo']=agrupadoFecha['grafo'].replace({0:2})

#agrupadoFechaHora = df_00.groupby(['topicName','fecha','hora']).agg({'key': 'count'}).reset_index()
#agrupadoFechaHora['cantidad'] = agrupadoFechaHora['key']
#del agrupadoFechaHora['key']
#agrupadoFechaHora_A = agrupadoFechaHora.groupby(['fecha','hora']).agg({'cantidad': 'sum'}).reset_index()
#agrupadoFechaHora_A['total'] = agrupadoFechaHora_A['cantidad']
#del agrupadoFechaHora_A['cantidad']
#agrupadoFechaHora = agrupadoFechaHora.merge(agrupadoFechaHora_A, how='inner', on=['fecha','hora'])
#agrupadoFechaHora['final'] =((agrupadoFechaHora['cantidad']) / agrupadoFechaHora['total'])
#agrupadoFechaHora = df_grafo2.merge(agrupadoFechaHora, how='right', on='topicName')
#agrupadoFechaHora['grafo']=agrupadoFechaHora['matched']
#del agrupadoFechaHora['matched']
#agrupadoFechaHora['grafo']=agrupadoFechaHora['grafo'].apply(lambda x: 1 if x!=1 else 0)
#agrupadoFechaHora['grafo']=agrupadoFechaHora['grafo'].replace({0:2})


agrupadoFechaHoraDia = df_00.groupby(['topicName','fecha','hora','dia']).agg({'key': 'count'}).reset_index()
agrupadoFechaHoraDia['cantidad'] = agrupadoFechaHoraDia['key']
del agrupadoFechaHoraDia['key']
agrupadoFechaHoraDia_A = agrupadoFechaHoraDia.groupby(['fecha','hora','dia']).agg({'cantidad': 'sum'}).reset_index()
agrupadoFechaHoraDia_A['total'] = agrupadoFechaHoraDia_A['cantidad']
del agrupadoFechaHoraDia_A['cantidad']
agrupadoFechaHoraDia = agrupadoFechaHoraDia.merge(agrupadoFechaHoraDia_A, how='inner', on=['fecha','hora','dia'])
agrupadoFechaHoraDia['final'] =((agrupadoFechaHoraDia['cantidad']) / agrupadoFechaHoraDia['total'])
agrupadoFechaHoraDia = df_grafo2.merge(agrupadoFechaHoraDia, how='right', on='topicName')
agrupadoFechaHoraDia['grafo']=agrupadoFechaHoraDia['matched']
del agrupadoFechaHoraDia['matched']
agrupadoFechaHoraDia['grafo']=agrupadoFechaHoraDia['grafo'].apply(lambda x: 1 if x!=1 else 0)
agrupadoFechaHoraDia['grafo']=agrupadoFechaHoraDia['grafo'].replace({0:2})


agrupadoFechaHoraDia



Unnamed: 0,topicName,fecha,hora,dia,cantidad,total,final,grafo
0,Activación de productos,2022-05-27,15:00:00,Friday,1,113,0.00885,1
1,Baja de productos,2022-05-27,15:00:00,Friday,8,113,0.070796,1
2,Cambio de ejecutivo,2022-05-27,15:00:00,Friday,1,113,0.00885,1
3,Clave Digipass,2022-05-27,15:00:00,Friday,1,113,0.00885,1
4,Consulta compras TD y TC,2022-05-27,15:00:00,Friday,10,113,0.088496,1
5,Consulta ejecutivo de cuenta,2022-05-27,15:00:00,Friday,2,113,0.017699,1
6,Consulta estado de cuenta y facturación,2022-05-27,15:00:00,Friday,9,113,0.079646,1
7,Consulta por cheques,2022-05-27,15:00:00,Friday,1,113,0.00885,1
8,Corte llamado,2022-05-27,15:00:00,Friday,3,113,0.026549,2
9,Emergencias bancarias,2022-05-27,15:00:00,Friday,8,113,0.070796,1
