In [10]:
import numpy as np
import pandas as pd
import os
import pyodbc

from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import DateRange
from google.analytics.data_v1beta.types import Dimension
from google.analytics.data_v1beta.types import Metric
from google.analytics.data_v1beta.types import MetricType
from google.analytics.data_v1beta.types import Filter
from google.analytics.data_v1beta.types import FilterExpression
from google.analytics.data_v1beta.types import FilterExpressionList
from google.analytics.data_v1beta.types import QuotaStatus
from google.analytics.data_v1beta.types import GetMetadataRequest
from google.analytics.data_v1beta.types import RunReportRequest

import datetime
from datetime import datetime, timedelta, date
import warnings
warnings.simplefilter("ignore")

In [11]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\DARIEL\01 - Credenciales\CREDENCIALES GOOGLE\API GA4 MAYA YANBAL-8a9c90e8802e.json"

property_id = '374438551' #ID Reporte

In [12]:
#   FUNCIONES DE FORMATO

#   Diccionario
def sample_run_report(property_id):
    """Runs a metadata report on a Google Analytics 4 property."""

    client = BetaAnalyticsDataClient()

    request = GetMetadataRequest(name='properties/'+property_id+'/metadata')
    response = client.get_metadata(request)

    output =[]
    for dimension in response.dimensions:
        output.append({"Type": "Dimension", "API_Name": f"{dimension.api_name}", "UI_Name": f"{dimension.ui_name}", "Description": f"{dimension.description}", "Custom_definition": f"{dimension.custom_definition}", "Metric_type": "N/A"})

    for metric in response.metrics:
      output.append({"Type": "Metric", "API_Name": f"{metric.api_name}", "UI_Name": f"{metric.ui_name}", "Description": f"{metric.description}", "Custom_definition": f"{metric.custom_definition}", "Metric_type": f"{MetricType(metric.type_).name}"})
    
    df = pd.DataFrame(output)
    
    return df

def ga4_response_to_df(response):
    
    dim_len = len(response.dimension_headers)
    metric_len = len(response.metric_headers)
    all_data = []

    for row in response.rows:
        row_data = {}
        for i in range(0, dim_len):
            row_data.update({response.dimension_headers[i].name: row.dimension_values[i].value})
        for i in range(0, metric_len):
            row_data.update({response.metric_headers[i].name: row.metric_values[i].value})
        all_data.append(row_data)

    df = pd.DataFrame(all_data)
    
    return df

def get_ga4_report_df(property_id, dimensions, metrics, start_date, end_date):
    
    dimensions_ga4 = []
    for dimension in dimensions:
        dimensions_ga4.append(Dimension(name=dimension))

    metrics_ga4 = []
    for metric in metrics:
        metrics_ga4.append(Metric(name=metric))
    
    iter = 0
    offset = 0
    limit=250000

    while True:

        client = BetaAnalyticsDataClient()
        
        request = RunReportRequest(
            property = 'properties/'+property_id,
            return_property_quota = True,
            dimensions = dimensions_ga4,
            metrics = metrics_ga4,
            date_ranges = [DateRange(start_date=start_date, end_date=end_date)],
            dimension_filter=FilterExpression(
                and_group=FilterExpressionList(
                    expressions=[                    
                        FilterExpression(
                            filter=Filter(field_name="customEvent:code_directora", string_filter=Filter.StringFilter(match_type=Filter.StringFilter.MatchType.PARTIAL_REGEXP, value="\d"))
                        ),
                        FilterExpression(
                            filter=Filter(field_name="pagePath", string_filter=Filter.StringFilter(match_type=Filter.StringFilter.MatchType.PARTIAL_REGEXP, value="/yanbi\."))
                        ),
                    ],          
                )
            ),
            limit=limit,
            offset=offset
        )
        
        response = client.run_report(request)
        
        if iter == 0:
            
            df_result = ga4_response_to_df(response)
            iter = iter + 1
            print(df_result.shape)
            print(iter)

        elif iter > 0:

            if not response.rows:
                break
            else:
                df_temp = ga4_response_to_df(response)
                df_result = pd.concat([df_result, df_temp], ignore_index=True)
                print(df_result.shape)
                iter = iter + 1
                print(iter)

        offset += limit

    return df_result

def cargarSQLSERVER(table_name, df):

    ecua_conn = pyodbc.connect(ecua_cadena_coneccion)

    with ecua_conn.cursor() as cursor:

        for index, row in df.iterrows():
            row_values = row.values.tolist()
            placeholders = ', '.join(['?'] * len(row_values))
            query = f"INSERT INTO {table_name} VALUES ({placeholders})"
            cursor.execute(query, row_values)
            ecua_conn.commit()

    ecua_conn.close()

In [13]:
'''
df_ReporteDiccionario = sample_run_report(property_id)

df_ReporteDiccionario.head(25)

## Se crea un archivo Excell con la lista de todas las métricas a exportar
df_ReporteDiccionario.to_excel(f'Lista de Métricas y Dimensiones a Exportar_{str(date.today())}.xlsx', sheet_name = 'GA4_report', engine = 'xlsxwriter', index=False, header=True)
'''

In [14]:
ecua_ds_srv = r'ECUSRVDW0\ECUA'
ecua_ds_db = r'DWUniqueYanbal'

ecua_cadena_coneccion = 'DRIVER={SQL Server Native Client 11.0};SERVER='+ecua_ds_srv+';DATABASE='+ecua_ds_db+';Trusted_Connection=yes'

#   CONSULTAR FECHA INICIO Y FIN DE CAMPANIA ACTUAL

ecua_conn = pyodbc.connect(ecua_cadena_coneccion)

query_infoCampania = '''
SELECT 
	intNumeroCampania, 
	'S'+CAST(smlNumeroSemanaCampania AS VARCHAR) smlNumeroSemanaCampania, 
	dtmFechaInicioSemana, 
	dtmFechaFinSemana
FROM DWUniqueYanbal.dbo.Tiempo
WHERE dtmFechaInicioSemana <= GETDATE() AND dtmFechaFinSemana >= GETDATE()
'''

df_infoCampania = pd.read_sql_query(query_infoCampania, ecua_conn)

ecua_conn.close()

df_infoCampania['intNumeroCampania'] = df_infoCampania['intNumeroCampania'].astype(str)
df_infoCampania['dtmFechaInicioSemana'] = df_infoCampania['dtmFechaInicioSemana'].dt.strftime('%Y-%m-%d')
df_infoCampania['dtmFechaFinSemana'] = df_infoCampania['dtmFechaFinSemana'].dt.strftime('%Y-%m-%d')
df_infoCampania

Unnamed: 0,intNumeroCampania,smlNumeroSemanaCampania,dtmFechaInicioSemana,dtmFechaFinSemana
0,202307,S4,2023-07-08,2023-07-14


##  Cargar data

In [15]:
#   Fecha de Consulta
previous_datetime = (datetime.now().date()) - timedelta(days=1)
previous_datetime = previous_datetime.strftime('%Y-%m-%d')
#   Fecha de Carga
current_datetime = datetime.now()
current_datetime = current_datetime.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
'''
df_infoCampania['smlNumeroSemanaCampania'] = 'S3'
previous_datetime = '2023-07-07'
print(df_infoCampania)
print(previous_datetime)
'''

In [18]:
#   SESIONES

dimensions = ['date', 'customEvent:code_directora']
metrics = ['sessions', 'userEngagementDuration', 'averageSessionDuration']
start_date = previous_datetime
end_date = previous_datetime

df_GA4_Sesiones = get_ga4_report_df(property_id, dimensions, metrics, start_date, end_date)

df_GA4_Sesiones['CAMPANIA'] = df_infoCampania.loc[0, 'intNumeroCampania']
df_GA4_Sesiones['CAMPANIA_SEMANA'] = df_infoCampania.loc[0, 'smlNumeroSemanaCampania']
df_GA4_Sesiones['FECHA_CARGA'] = current_datetime

df_GA4_Sesiones['sessions'] = df_GA4_Sesiones['sessions'].astype(int)
df_GA4_Sesiones['userEngagementDuration'] = df_GA4_Sesiones['userEngagementDuration'].astype(int)
df_GA4_Sesiones['averageSessionDuration'] = df_GA4_Sesiones['averageSessionDuration'].astype(float)

df_GA4_Sesiones = df_GA4_Sesiones[['CAMPANIA','CAMPANIA_SEMANA','date','customEvent:code_directora', 'sessions', 'userEngagementDuration', 'averageSessionDuration', 'FECHA_CARGA']]

#cargarSQLSERVER('[DWCorporacion].dbo.GA_Piloto_GrupoPersonal_Sesiones', df_GA4_Sesiones)

(54, 5)
1


In [20]:
#   SECCIONES

dimensions = ['date','customEvent:code_directora','customEvent:cns_codigo','eventName','customEvent:path','customEvent:titulo','customEvent:elemento']
metrics = ['eventCount']
start_date = previous_datetime
end_date = previous_datetime

df_GA4_Secciones = get_ga4_report_df(property_id, dimensions, metrics, start_date, end_date)

df_GA4_Secciones['CAMPANIA'] = df_infoCampania.loc[0, 'intNumeroCampania']
df_GA4_Secciones['CAMPANIA_SEMANA'] = df_infoCampania.loc[0, 'smlNumeroSemanaCampania']
df_GA4_Secciones['FECHA_CARGA'] = current_datetime

df_GA4_Secciones['eventCount'] = df_GA4_Secciones['eventCount'].astype(int)
df_GA4_Secciones['customEvent:titulo'] = df_GA4_Secciones['customEvent:titulo'].replace("'", '', regex=True)

df_GA4_Secciones = df_GA4_Secciones[['CAMPANIA','CAMPANIA_SEMANA','date','customEvent:code_directora','customEvent:cns_codigo','eventName','customEvent:path','customEvent:titulo','customEvent:elemento','eventCount','FECHA_CARGA']]


#   Reemplazar valores en blanco por Login (solo en el vento content_view)
condition1 = df_GA4_Secciones['eventName'] == 'content_view'
condition2 = df_GA4_Secciones['customEvent:titulo'] == ''
df_GA4_Secciones.loc[condition1 & condition2, 'customEvent:titulo']  = np.nan
df_GA4_Secciones.loc[condition1 & condition2, 'customEvent:titulo'] = df_GA4_Secciones.loc[condition1 & condition2, 'customEvent:titulo'].fillna("Login")

#   Reemplazar Reactiva por Reactivar
condition1 = df_GA4_Secciones['customEvent:titulo'] == 'Reactiva'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo']  = 'Reactivar'

#   Reemplazar Repitentes por Activa
condition1 = df_GA4_Secciones['customEvent:titulo'] == 'Repitentes'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo']  = 'Activas'

#   Agrupar eventos con consultoras en Grupo Personal - detalle consultora
condition1 = df_GA4_Secciones['eventName'] == 'content_view'
items_list = ['Inicio', 'Iniciar Sesión', 'Deuda de Campaña', 'Retener', 'Reactivar', 'Primeros Pedidos', 'Repitentes', 'Grupo Personal', 'Términos y Condiciones', 'Inactivas', 'Login', 'Como leer mi estructura']
condition2 = ~df_GA4_Secciones['customEvent:titulo'].isin(items_list)
df_GA4_Secciones.loc[condition1 & condition2, 'customEvent:titulo'] = 'Grupo Personal - detalle consultora'

#   Actualizar titulo Descargar Estructura
condition1 = df_GA4_Secciones['eventName'] == 'event_click_descargar_pdf'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo'] = 'Descargar Estructura'

#   Actualizar nombre por Path
condition1 = df_GA4_Secciones['customEvent:path'] == '/inicio/reactivar'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo'] = 'Reactivar'

condition1 = df_GA4_Secciones['customEvent:path'] == '/inicio/inactivas'
condition2 = df_GA4_Secciones['customEvent:titulo'] == 'Inactivas'
df_GA4_Secciones.loc[condition1 & condition2, 'customEvent:titulo'] = 'Reactivar'

condition1 = df_GA4_Secciones['customEvent:path'] == '/inicio/repitentes'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo'] = 'Activas'

condition1 = df_GA4_Secciones['customEvent:path'] == '/inicio/primeros_pedidos'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo'] = 'Primeros Pedidos'

condition1 = df_GA4_Secciones['customEvent:path'] == '/inicio/retener'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo'] = 'Retener'

condition1 = df_GA4_Secciones['customEvent:path'] == '/inicio/deuda_de_campaña'
df_GA4_Secciones.loc[condition1, 'customEvent:titulo'] = 'Deuda de Campaña'

#cargarSQLSERVER('[DWCorporacion].dbo.GA_Piloto_GrupoPersonal_Secciones', df_GA4_Secciones)

(596, 8)
1


In [22]:
#   CONSULTORA

dimensions = ['date', 'customEvent:cns_codigo','customEvent:nombre_cns_contactada','customEvent:numero_whatssap','eventName']
metrics = []
start_date = previous_datetime
end_date = previous_datetime

df_GA4_Consultora = get_ga4_report_df(property_id, dimensions, metrics, start_date, end_date)

#   Consolidacion
df_GA4_Consultora['FECHA_CARGA'] = current_datetime

df_GA4_Consultora = df_GA4_Consultora[df_GA4_Consultora['eventName']=='event_click_whatssap']

df_GA4_Consultora = df_GA4_Consultora.drop_duplicates(subset=['date', 'customEvent:cns_codigo','customEvent:nombre_cns_contactada','eventName'], keep='last')
df_GA4_Consultora.reset_index(inplace=True, drop=True)

#   Eliminar Filas sin codigo de consultora definido
df_GA4_Consultora = df_GA4_Consultora[df_GA4_Consultora['customEvent:cns_codigo']!='(not set)']

#cargarSQLSERVER('[DWCorporacion].dbo.GA_Piloto_GrupoPersonal_Consultora', df_GA4_Consultora)

(248, 5)
1
