## Para analizar y limpiar datos del JSON Growth

In [152]:
import json
import pandas as pd
import plotly.express as px

In [153]:
import json

with open('data_ciabatta.json', 'r') as f:
    datos_ciabatta = json.load(f)

with open('data_vicenta.json', 'r') as f:
    datos_vicenta = json.load(f)

# Merge the data
datos_merged = datos_ciabatta + datos_vicenta

# Save the merged data to a new JSON file
new_filename = 'nuevo.json'
with open(new_filename, 'w') as f:
    json.dump(datos_merged, f)

In [154]:
df = pd.read_json('nuevo.json')

In [155]:
df.to_excel('GRW.xlsx', index=False)

In [156]:
import pandas as pd
import json

# Lee el archivo XLSX y obtén los datos de la columna
df = pd.read_excel('GRW.xlsx')
columna = df['detail']

# Analiza las cadenas JSON y crea una lista de diccionarios
data = []
for item in columna:
    diccionarios = json.loads(item.replace("'", '"'))
    for diccionario in diccionarios.values():
        data.extend(diccionario)

# Crea un DataFrame a partir de la lista de diccionarios
df_resultado = pd.DataFrame(data)

# Obtiene la información del DataFrame
filas_totales = df_resultado.shape[0]
columnas_totales = df_resultado.shape[1]

# Imprime la información del DataFrame
print("Filas totales:", filas_totales)
print("Columnas totales:", columnas_totales)


Filas totales: 172
Columnas totales: 22


In [157]:
# Convierte la columna de fecha a formato datetime si no está en ese formato
df_resultado['month'] = pd.to_datetime(df_resultado['month'])

# Ordena el DataFrame por la columna de fecha en orden ascendente
df_resultado = df_resultado.sort_values('month')

# Obtiene la primera fecha del DataFrame
primera_fecha = df_resultado['month'].iloc[0]

print("La primera fecha en el conjunto de datos es:", primera_fecha)

La primera fecha en el conjunto de datos es: 2023-03-01 00:00:00


In [158]:
df_resultado = df_resultado.drop(['cityName', 'country', 'brandId'], axis=1)

In [159]:
num_filas_duplicadas = df_resultado.duplicated().sum()
print("Número de filas duplicadas: ", num_filas_duplicadas)

Número de filas duplicadas:  121


In [161]:
df_resultado.to_excel('OperadoresGRW.xlsx', index=False)

## Para extraer data de Platforms de Index 

In [35]:
import awswrangler as wr
import pandas as pd
import boto3
import datetime
import json
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None)

In [36]:
L1_OPERATIONS_BUCKET = 'l1-operations'
L1_OPERATIONS_PREFIX = 'platforms-tables/last-snapshot/'
L1_OPERATIONS_STORES = 'platforms_store.csv.gz'
L1_OPERATIONS_BRANCHES = 'platforms_branch_full.csv.gz'
L1_OPERATIONS_BRANDS = 'platforms_brand.csv.gz'

# *Sales Channels
L1_BUCKET = 'l1-sales-channels'


def get_stores():
    """
        Read L1 data tables of operations app.
        
        Returns:
            df: DataFrame
    """
    s3_bucket = L1_OPERATIONS_BUCKET
    s3_prefix = L1_OPERATIONS_PREFIX.strip("/")
    s3_stores = L1_OPERATIONS_STORES
    path = f"s3://{s3_bucket}/{s3_prefix}/{s3_stores}"

    df_stores = wr.s3.read_csv(path=path, dataset=False)
    
    df_stores = (df_stores.rename(columns={
                                 "start_date": "store_start_date",
                                 "end_date": "store_end_date",
                                 "is_integrated": "store_is_integrated"}))
    df_stores["app_store_id"] = df_stores.app_store_id.astype(str)

    return df_stores


def get_branches():
    """
        Read L1 data tables of operations app.
        
        Returns:
            df: DataFrame
    """
    s3_bucket = L1_OPERATIONS_BUCKET
    s3_prefix = L1_OPERATIONS_PREFIX.strip("/")
    s3_branches = L1_OPERATIONS_BRANCHES
    path = f"s3://{s3_bucket}/{s3_prefix}/{s3_branches}"
    df_branches = wr.s3.read_csv(path=path, dataset=False)
    
    return df_branches
    

def get_brands():
    """
        Read L1 data tables of operations app.
        Returns:
            df: DataFrame
    """
    s3_bucket = L1_OPERATIONS_BUCKET
    s3_prefix = L1_OPERATIONS_PREFIX.strip("/")
    s3_brands = L1_OPERATIONS_BRANDS
    path = f"s3://{s3_bucket}/{s3_prefix}/{s3_brands}"

    df_brands = wr.s3.read_csv(path=path, dataset=False)
    df_brands = df_brands[["id", "name"]].rename(columns={"id": "brand_id", "name": "brand_name"})
    
    return df_brands


def get_platforms_data():
    """
        Merge tables of operations app.
        Returns:
            df: DataFrame
    """
    try:
        # Brands Definition
        df_brands = get_brands()
        df_brands = df_brands[['brand_id', 'brand_name']]

        # Branches Definition
        df_branches = get_branches()
        df_branches = df_branches[['id', 'name', 'address', 'is_own', 'operator_id', 'operator_name', 'lat', 'lng', 'city_country']]

        # Stores Definition
        df_stores = get_stores()
        df_stores = df_stores[['branch_id', 'brand_id', 'app', 'app_store_id', 'is_active', 'store_start_date', 'store_end_date']]

        # Merge Stores & Brands
        df_stores_brands = pd.merge(df_stores, df_brands, how="left", left_on='brand_id', right_on='brand_id')
        # Add Branches Information
        df = pd.merge(df_stores_brands, df_branches, how="left", left_on='branch_id', right_on='id')

        # Selection of Columns
        df = df[['app_store_id', 'branch_id', 'name', 'brand_id', 'brand_name', 'app', 
                 'store_start_date', 'store_end_date', 'is_active', 
                 'is_own', 'operator_id', 'operator_name',
                 'address', 'lat', 'lng', 'city_country']]
        df = df.rename(columns={'name': 'branch_name', 'city_country': 'country'})

        # Create MARGIN store_start_date to be use in order logic
        for index, row in df.iterrows():
            try:
                # If store_id was used before...
                idx =  df[(df['app_store_id'] == row['app_store_id']) & 
                          (df['store_start_date'] != row['store_start_date']) & 
                          (df['store_end_date'] < row['store_start_date'])].index.values.astype(int)[0]

                df.loc[index, 'store_margin_start_date'] = df.at[idx,'store_end_date']
            except:
                pass
            
        df['store_start_date'] = pd.to_datetime(df['store_start_date'])
        df['store_margin_start_date'] = pd.to_datetime(df['store_margin_start_date'])
        df['store_margin_start_date'] = np.where(df['store_margin_start_date'].isnull(), 
                                                 df['store_start_date'] - datetime.timedelta(days=90), 
                                                 df['store_margin_start_date'])
        
        # Restore to default type
        df['store_start_date'] = df['store_start_date'].dt.strftime('%Y-%m-%d')
        df['store_margin_start_date'] = df['store_margin_start_date'].dt.strftime('%Y-%m-%d')
        
        # Fill Franchise Features
        df["operator_name"].fillna("Kitchenita", inplace = True)

        return { "statusCode": 200, "df": df }
    except Exception as err:
        return { "statusCode": 500, "body": f"Failed while trying to get Platforms DATA: {err}" }

In [37]:
dresponse  = get_platforms_data()
dfStores   = dresponse['df']

dfStoresRP = dfStores[(dfStores['app'] == 'RP')]

dfStoresRP['app_store_id'] = dfStoresRP['app_store_id'].astype(int)

dfStoresRP

Unnamed: 0,app_store_id,branch_id,branch_name,brand_id,brand_name,app,store_start_date,store_end_date,is_active,is_own,operator_id,operator_name,address,lat,lng,country,store_margin_start_date
0,134906,3,Villa Urquiza - Quesada Kitchenita,6,Hiro Sushi Salad,RP,2021-06-01,2021-07-15,False,True,36,Kitchenita Argentina,Quesada 5050,-34.568001,-58.489101,AR,2021-05-31
1,180610,2,Almagro - Bustamante,39,Barrigón,RP,2021-08-02,2021-12-24,False,True,36,Kitchenita Argentina,Bustamante 875,-34.600340,-58.413653,AR,2021-05-04
2,138630,3,Villa Urquiza - Quesada Kitchenita,15,Mattinata,RP,2021-06-01,2021-09-01,False,True,36,Kitchenita Argentina,Quesada 5050,-34.568001,-58.489101,AR,2021-05-31
3,134947,2,Almagro - Bustamante,13,Santa Pizza,RP,2020-11-27,2021-06-16,False,True,36,Kitchenita Argentina,Bustamante 875,-34.600340,-58.413653,AR,2020-08-29
4,134953,2,Almagro - Bustamante,22,Milanesas Porteñas,RP,2020-12-17,2021-06-13,False,True,36,Kitchenita Argentina,Bustamante 875,-34.600340,-58.413653,AR,2020-09-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,900372571,92,Suba - Carrera 50,4,Planta Base,RP,2023-06-01,,True,False,67,Campo Trece Comida Rápida,Cra 50 # 142-68,4.726545,-74.054287,CO,2023-03-03
568,900372592,91,Santa Barbara - Carrera 11A,8,Vicenta,RP,2023-06-15,,True,False,66,La artesana del pan,Carrera 11A # 116-53,4.695728,-74.037830,CO,2023-03-17
587,211709,69,Belgrano - Mi Barrio,33,Juliana Lopez May,RP,2023-04-21,,True,False,47,Mi Barrio,La Pampa 2747,-34.566558,-58.457493,AR,2023-01-21
593,211940,73,Ibis Monserrat - Hipolito Yrigoyen,8,Vicenta,RP,2023-06-29,,True,False,51,Ibis Congreso,Hipolito Yrigoyen 1592,-34.610016,-58.388986,AR,2023-03-31


In [38]:
country = "AR"
city = "BUE"

from_date = datetime.date(2023,1,15)
to_date = datetime.date(2023,7,5)

df_index = pd.DataFrame()
prefix = "s3://l2-food-crawlers/rappi/index"
for days_offset in range((to_date - from_date).days + 1):
    date = from_date + datetime.timedelta(days=days_offset)
    path = f"{prefix}/country={country}/city={city}/year={date.year:04d}/month={date.month:02d}/day={date.day:02d}"
    
    # Si no están todos los días...
    try:
        df_date_index = wr.s3.read_parquet(path, dataset=True)

        df_date_index = pd.merge(df_date_index, dfStoresRP, left_on="store_id", right_on='app_store_id')
        #df_index = df_index.append(df_date_index, ignore_index=True)
        df_index = pd.concat([df_index, df_date_index])
    except Exception as err:
        print(f"No encontró {date}")

No encontró 2023-04-26
No encontró 2023-04-27
No encontró 2023-04-28
No encontró 2023-04-29
No encontró 2023-04-30
No encontró 2023-05-01
No encontró 2023-05-02
No encontró 2023-05-03
No encontró 2023-05-04


In [39]:
mask_lunch  = (datetime.time(8) < df_index["local_time"]) &  (datetime.time(17) > df_index["local_time"])
mask_dinner = (datetime.time(19) < df_index["local_time"]) &  (datetime.time(23,59,59) > df_index["local_time"])
df_index["slot_time"] = None

df_index.loc[mask_dinner, "slot_time"] = "dinner"
df_index.loc[mask_lunch,  "slot_time"] = "lunch"

In [40]:
df_index

Unnamed: 0,country_x,city,timestamp,local_date,local_time,query_id,query_lat,query_lng,store_id,index_paid,index_position,store_distance,delivery_price,has_delivery_price,status,is_available,eta,has_saturation_cooking_time,saturation_cooking_time,has_saturation_distance,saturation_distance,city_level_1,city_level_2,city_level_3,slot_time,year,month,day,app_store_id,branch_id,branch_name,brand_id,brand_name,app,store_start_date,store_end_date,is_active,is_own,operator_id,operator_name,address,lat,lng,country_y,store_margin_start_date
0,AR,BUE,2023-01-15 13:31:16.452251,2023-01-15,10:31:16,d7b7d3767cfc3a5f8dfb776b7a55c19de75ddf17,-34.596012,-58.473905,130531,False,581,3.409149,329.0,True,OFF,False,,False,,False,,CABA,15,Paternal,lunch,2023,01,15,130531,3,Villa Urquiza - Quesada Kitchenita,3,Jardín,RP,2021-06-01,2022-06-30,False,True,36,Kitchenita Argentina,Quesada 5050,-34.568001,-58.489101,AR,2021-05-31
1,AR,BUE,2023-01-15 13:31:16.452251,2023-01-15,10:31:16,d7b7d3767cfc3a5f8dfb776b7a55c19de75ddf17,-34.596012,-58.473905,130531,False,581,3.409149,329.0,True,OFF,False,,False,,False,,CABA,15,Paternal,lunch,2023,01,15,130531,1,Belgrano - Moldes,3,Jardín,RP,2020-07-22,2021-05-31,False,True,36,Kitchenita Argentina,Moldes 2499,-34.560118,-58.461640,AR,2020-04-23
2,AR,BUE,2023-01-15 13:31:16.452251,2023-01-15,10:31:16,d7b7d3767cfc3a5f8dfb776b7a55c19de75ddf17,-34.596012,-58.473905,130531,False,581,3.409149,329.0,True,OFF,False,,False,,False,,CABA,15,Paternal,lunch,2023,01,15,130531,26,Villa Urquiza - Quesada,3,Jardín,RP,2022-07-01,2023-04-13,False,False,17,Descom Distribución,Quesada 5050,-34.568001,-58.489101,AR,2022-06-30
3,AR,BUE,2023-01-15 13:34:18.323112,2023-01-15,10:34:18,a9e6c52f52b7f199253b7c4d779c0c76f7293fe6,-34.534803,-58.484447,130531,False,305,3.688750,329.0,True,OFF,False,,False,,False,,Zona Norte AMBA,Vicente López,,lunch,2023,01,15,130531,3,Villa Urquiza - Quesada Kitchenita,3,Jardín,RP,2021-06-01,2022-06-30,False,True,36,Kitchenita Argentina,Quesada 5050,-34.568001,-58.489101,AR,2021-05-31
4,AR,BUE,2023-01-15 13:34:18.323112,2023-01-15,10:34:18,a9e6c52f52b7f199253b7c4d779c0c76f7293fe6,-34.534803,-58.484447,130531,False,305,3.688750,329.0,True,OFF,False,,False,,False,,Zona Norte AMBA,Vicente López,,lunch,2023,01,15,130531,1,Belgrano - Moldes,3,Jardín,RP,2020-07-22,2021-05-31,False,True,36,Kitchenita Argentina,Moldes 2499,-34.560118,-58.461640,AR,2020-04-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,AR,BUE,2023-07-05 22:30:56.832397,2023-07-05,19:30:56,e0ffe5fcf1a456ac4345dfd791830f097151a8f2,-34.553274,-58.436781,187877,False,533,2.237508,459.0,True,TEMPORARILY_UNAVAILABLE_OFF,False,,False,,False,,CABA,13,Belgrano,dinner,2023,07,05,187877,8,Belgrano - Juramento,8,Vicenta,RP,2023-02-06,2023-05-30,False,False,3,Orno Pizzería,"Juramento 2527, Belgrano",-34.562000,-58.458010,AR,2022-10-24
363,AR,BUE,2023-07-05 22:30:36.311521,2023-07-05,19:30:36,9425d0c817006e1f3658edbc1ac8f30cf1ac83b7,-34.540170,-58.474614,187877,False,522,2.930331,150.0,True,TEMPORARILY_UNAVAILABLE_OFF,False,,False,,False,,CABA,13,Nuñez,dinner,2023,07,05,187877,8,Belgrano - Juramento,8,Vicenta,RP,2022-02-05,2022-10-24,False,False,3,Orno Pizzería,"Juramento 2527, Belgrano",-34.562000,-58.458010,AR,2021-11-07
364,AR,BUE,2023-07-05 22:30:36.311521,2023-07-05,19:30:36,9425d0c817006e1f3658edbc1ac8f30cf1ac83b7,-34.540170,-58.474614,187877,False,522,2.930331,150.0,True,TEMPORARILY_UNAVAILABLE_OFF,False,,False,,False,,CABA,13,Nuñez,dinner,2023,07,05,187877,8,Belgrano - Juramento,8,Vicenta,RP,2023-02-06,2023-05-30,False,False,3,Orno Pizzería,"Juramento 2527, Belgrano",-34.562000,-58.458010,AR,2022-10-24
365,AR,BUE,2023-07-05 22:32:01.684690,2023-07-05,19:32:01,fdc63d2a23f9bf85affbc8cda1e641929fd4da8d,-34.541947,-58.463140,187877,False,553,2.364496,499.0,True,TEMPORARILY_UNAVAILABLE_OFF,False,,False,,False,,CABA,13,Nuñez,dinner,2023,07,05,187877,8,Belgrano - Juramento,8,Vicenta,RP,2022-02-05,2022-10-24,False,False,3,Orno Pizzería,"Juramento 2527, Belgrano",-34.562000,-58.458010,AR,2021-11-07


In [54]:
df_index.to_excel('OperadoresIND.xlsx', index=False)

## Para extraer data de Platforms de OPS 

In [102]:
# Esta es la libreria que nos permite levantar datos de AWS como dataframes de Pandas
import awswrangler as wr

import pandas as pd
import numpy as np

import datetime

pd.options.display.max_columns = None
pd.options.display.max_rows = 100

In [103]:
# Leemos datos del operador (calculo diario)
df_daily = wr.s3.read_parquet("s3://data-prod-l2-metrics/internal_use/operators/daily/snapshot/", dataset=True)

# Leemos los datos de 
df_calendar_month = wr.s3.read_csv("s3://data-prod-l2-metrics/internal_use/operators/calendar_month/snapshot/", dataset=True)

# Convertimos en formato fecha a la columna fecha
df_daily["date"] = pd.to_datetime(df_daily["date"])

df_calendar_month["day"] = 1
df_calendar_month["date"] = pd.to_datetime(df_calendar_month[["year", "month", "day"]])

# Rellenamos una columna
df_daily["app"] = df_daily["app"].fillna("All")
df_calendar_month["app"] = df_calendar_month["app"].fillna("All")

# Ordenamos los datos con las fechas (de mayor a menor), pais, marca y cocina.
df_daily.sort_values(by=["date", "country", "brand_id", "branch_id"], ascending=[False, True, True, True], inplace=True)
df_calendar_month.sort_values(by=["date", "country", "brand_id", "branch_id"], ascending=[False, True, True, True], inplace=True)

# Corregimos algunas inconsistencias por cambios
df_daily["is_in_warning"] = df_daily["is_in_warning"].fillna(df_daily["is_regular"])
df_daily["is_regular"] = df_daily["is_over_performing"].fillna(df_daily["is_regular"])
df_daily.drop(columns=["is_over_performing"], inplace=True)

df_daily["buy_back_percent"] = df_daily["buy_back_percent"].fillna(0)
df_calendar_month["buy_back_percent"] = df_calendar_month["buy_back_percent"].fillna(0)

In [104]:
df_daily

Unnamed: 0,country,branch_id,brand_id,availability_prop_raw,availability_percent_raw,availability_percent,date,buy_back_clients,total_users,buy_back_ratio,buy_back_percent,cancelled_percent,cancelled_ratio_raw,cancelled_num_raw,orders_total_from_header_raw,cooking_time_raw,complete_delivery_time_raw,cooking_time,complete_delivery_time,courier_not_wait_sum_raw,courier_waiting_sum_raw,orders_total_with_courier_info_raw,on_time_ratio_raw,on_time_ratio_percent,orders,ticket_amount,perfomance,is_zombie,is_regular,num_review,mean_score_review_raw,sum_score_review_raw,mean_score_review,city_id,city_name,trunk_id,trunk_name,branch_name,brand_name,company_id,company_name,year,month,day,orders_with_delivery_info,is_in_warning,app,inactive,availability,prod_avail_prop,prod_avail_percent,start_date
20537,AR,2,3,0.219048,21.904762,22.0,2023-07-11,,,,0.0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,BUE,Buenos Aires,BUE,Buenos Aires,Almagro - Bustamante,Jardín,36,Kitchenita Argentina,2023,07,11,,False,RP,False,,,,2020-11-11
20542,AR,2,3,0.000000,0.000000,0.0,2023-07-11,,,,0.0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,BUE,Buenos Aires,BUE,Buenos Aires,Almagro - Bustamante,Jardín,36,Kitchenita Argentina,2023,07,11,,False,PY,False,,,,2020-11-11
20642,AR,2,3,0.109524,10.952381,11.0,2023-07-11,,,,0.0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,BUE,Buenos Aires,BUE,Buenos Aires,Almagro - Bustamante,Jardín,36,Kitchenita Argentina,2023,07,11,,False,All,False,,,,2020-11-11
20568,AR,69,4,0.760019,76.001886,76.0,2023-07-11,6.0,30.0,0.20,20.0,8.0,0.083333,3.0,36.0,38.044141,52.447187,38.0,52.0,32.0,1.0,33.0,0.969697,97.0,1.100000,4672.600000,0.087302,True,False,2.0,2.500000,5.0,,BUE,Buenos Aires,BUE,Buenos Aires,Belgrano - Mi Barrio,Planta Base,47,Mi Barrio,2023,07,11,33.0,False,RP,False,,,,2023-04-21
20570,AR,69,4,0.742810,74.281000,74.0,2023-07-11,9.0,36.0,0.25,25.0,0.0,0.000000,0.0,49.0,22.277279,31.926607,22.0,32.0,,,,,,1.633333,6052.266667,0.129630,False,False,3.0,3.666667,11.0,,BUE,Buenos Aires,BUE,Buenos Aires,Belgrano - Mi Barrio,Planta Base,47,Mi Barrio,2023,07,11,48.0,True,PY,False,,,,2023-04-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,MX,45,32,,,,2023-01-17,,,,0.0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,Hipódromo - Iztaccihuatl,Ciabatta,40,Ilan Steiner Bejar,2023,01,17,,False,All,,,,,NaT
75,MX,46,32,,,,2023-01-17,,,,0.0,,,,,,,,,,,,,,0.000000,,,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,Santa Fe - Agraz,Ciabatta,37,Kitchenita México,2023,01,17,,False,All,,,,,NaT
76,MX,47,32,,,,2023-01-17,,,,0.0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,Los Manzanos - Lago Como,Ciabatta,42,"Cocinas Unidas, SAPI de CV",2023,01,17,,False,All,,,,,NaT
77,MX,48,32,,,,2023-01-17,,,,0.0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,San Ángel - Cracovia,Ciabatta,41,Edgar Ordaz Sanchez,2023,01,17,,False,All,,,,,NaT


In [106]:
df_daily.to_excel('OperadoresOPS.xlsx', index=False)

## Juntar todos los dataset

In [193]:
import pandas as pd
import numpy as np

In [194]:
# Read the Excel file
OPS = pd.read_excel("OperadoresOPS.xlsx")

# Display the contents of the DataFrame
OPS

Unnamed: 0,country,branch_id,brand_id,availability_prop_raw,availability_percent_raw,availability_percent,date,buy_back_clients,total_users,buy_back_ratio,buy_back_percent,cancelled_percent,cancelled_ratio_raw,cancelled_num_raw,orders_total_from_header_raw,cooking_time_raw,complete_delivery_time_raw,cooking_time,complete_delivery_time,courier_not_wait_sum_raw,courier_waiting_sum_raw,orders_total_with_courier_info_raw,on_time_ratio_raw,on_time_ratio_percent,orders,ticket_amount,perfomance,is_zombie,is_regular,num_review,mean_score_review_raw,sum_score_review_raw,mean_score_review,city_id,city_name,trunk_id,trunk_name,branch_name,brand_name,company_id,company_name,year,month,day,orders_with_delivery_info,is_in_warning,app,inactive,availability,prod_avail_prop,prod_avail_percent,start_date
0,AR,2,3,0.219048,21.904762,22.0,2023-07-11,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,BUE,Buenos Aires,BUE,Buenos Aires,Almagro - Bustamante,Jardín,36,Kitchenita Argentina,2023,7,11,,False,RP,0.0,,,,2020-11-11
1,AR,2,3,0.000000,0.000000,0.0,2023-07-11,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,BUE,Buenos Aires,BUE,Buenos Aires,Almagro - Bustamante,Jardín,36,Kitchenita Argentina,2023,7,11,,False,PY,0.0,,,,2020-11-11
2,AR,2,3,0.109524,10.952381,11.0,2023-07-11,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,BUE,Buenos Aires,BUE,Buenos Aires,Almagro - Bustamante,Jardín,36,Kitchenita Argentina,2023,7,11,,False,All,0.0,,,,2020-11-11
3,AR,69,4,0.760019,76.001886,76.0,2023-07-11,6.0,30.0,0.20,20,8.0,0.083333,3.0,36.0,38.044141,52.447187,38.0,52.0,32.0,1.0,33.0,0.969697,97.0,1.100000,4672.600000,0.087302,True,False,2.0,2.500000,5.0,,BUE,Buenos Aires,BUE,Buenos Aires,Belgrano - Mi Barrio,Planta Base,47,Mi Barrio,2023,7,11,33.0,False,RP,0.0,,,,2023-04-21
4,AR,69,4,0.742810,74.281000,74.0,2023-07-11,9.0,36.0,0.25,25,0.0,0.000000,0.0,49.0,22.277279,31.926607,22.0,32.0,,,,,,1.633333,6052.266667,0.129630,False,False,3.0,3.666667,11.0,,BUE,Buenos Aires,BUE,Buenos Aires,Belgrano - Mi Barrio,Planta Base,47,Mi Barrio,2023,7,11,48.0,True,PY,0.0,,,,2023-04-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20699,MX,45,32,,,,2023-01-17,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,Hipódromo - Iztaccihuatl,Ciabatta,40,Ilan Steiner Bejar,2023,1,17,,False,All,,,,,NaT
20700,MX,46,32,,,,2023-01-17,,,,0,,,,,,,,,,,,,,0.000000,,,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,Santa Fe - Agraz,Ciabatta,37,Kitchenita México,2023,1,17,,False,All,,,,,NaT
20701,MX,47,32,,,,2023-01-17,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,Los Manzanos - Lago Como,Ciabatta,42,"Cocinas Unidas, SAPI de CV",2023,1,17,,False,All,,,,,NaT
20702,MX,48,32,,,,2023-01-17,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,MEX,Ciudad de México,MEX,Ciudad de México,San Ángel - Cracovia,Ciabatta,41,Edgar Ordaz Sanchez,2023,1,17,,False,All,,,,,NaT


In [196]:
OPS = OPS[(OPS['branch_name'] == 'Palermo - Santa Fe') |
          (OPS['branch_name'] == 'Martinez - Av Santa Fe') |
          (OPS['branch_name'] == 'Almagro - Rivadavia') |
          (OPS['branch_name'] == 'Monte Castro - Lope de Vega') |
          (OPS['branch_name'] == 'Flores - Av Eva Perón')]

OPS = OPS[OPS['brand_name'] == 'Ciabatta']

OPS = OPS[OPS['app'] == 'RP']


In [197]:
OPS = OPS.sort_values(by='date')

In [198]:
OPS = OPS.dropna(axis=1, how='all')

In [199]:
column_names = OPS.columns.tolist()

# Display the column names
print(column_names)



In [200]:
columns_to_drop = ['country', 'branch_id', 'brand_id', 'year', 'month', 'trunk_id', 'trunk_name', 'branch_name', 'brand_name'
                   , 'company_id', 'app','city_id', 'city_name']

# Drop the specified columns from the DataFrame
OPS = OPS.drop(columns=columns_to_drop)

Unnamed: 0,availability_prop_raw,availability_percent_raw,availability_percent,date,buy_back_clients,total_users,buy_back_ratio,buy_back_percent,cancelled_percent,cancelled_ratio_raw,cancelled_num_raw,orders_total_from_header_raw,cooking_time_raw,complete_delivery_time_raw,cooking_time,complete_delivery_time,courier_not_wait_sum_raw,courier_waiting_sum_raw,orders_total_with_courier_info_raw,on_time_ratio_raw,on_time_ratio_percent,orders,ticket_amount,perfomance,is_zombie,is_regular,num_review,mean_score_review_raw,sum_score_review_raw,mean_score_review,company_name,day,orders_with_delivery_info,is_in_warning,inactive,start_date
16533,,,,2023-03-22,,,,0,,,,,,,,,,,,,,0.000000,,,True,False,,,,,La Fábrica,22,,False,0.0,NaT
16530,0.937607,93.760713,94.0,2023-03-22,7.0,57.0,0.122807,12,9.0,0.089744,7.0,78.0,32.794601,43.724883,33.0,44.0,14.0,12.0,26.0,0.538462,53.8,2.366667,4287.633333,0.176617,False,False,18.0,4.166667,75.0,4.2,La Fábrica,22,71.0,True,0.0,NaT
16528,0.855863,85.586258,86.0,2023-03-22,27.0,130.0,0.207692,21,1.0,0.013158,2.0,152.0,26.022333,39.130333,26.0,39.0,44.0,16.0,60.0,0.733333,73.3,5.000000,11585.700000,0.337838,False,True,22.0,3.954545,87.0,4.0,La Fábrica,22,150.0,False,0.0,NaT
16378,0.866118,86.611818,87.0,2023-03-23,28.0,131.0,0.213740,21,1.0,0.012821,2.0,156.0,26.518398,39.896970,27.0,40.0,44.0,14.0,58.0,0.758621,75.9,5.133333,11762.766667,0.346847,False,True,21.0,4.095238,86.0,4.1,La Fábrica,23,154.0,False,0.0,NaT
16383,0.208333,20.833333,21.0,2023-03-23,,,,0,,,,,,,,,,,,,,0.000000,0.000000,0.000000,True,False,,,,,La Fábrica,23,,False,0.0,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,0.704556,70.455647,70.0,2023-07-11,5.0,57.0,0.087719,9,5.0,0.047619,3.0,63.0,37.699754,49.744717,38.0,50.0,46.0,14.0,60.0,0.766667,76.7,2.000000,6486.700000,0.281690,False,False,9.0,3.111111,28.0,3.1,La Fabrica Peron y Cabildo,11,60.0,True,0.0,2023-05-10
39,0.958071,95.807077,96.0,2023-07-11,14.0,87.0,0.160920,16,0.0,0.000000,0.0,102.0,19.156756,32.158708,19.0,32.0,84.0,18.0,102.0,0.823529,82.4,3.400000,12608.733333,,False,False,16.0,3.625000,58.0,3.6,La Fabrica Martinez y San Isidro,11,102.0,True,0.0,2023-05-12
36,0.972806,97.280622,97.0,2023-07-11,8.0,48.0,0.166667,17,2.0,0.019608,1.0,51.0,24.910198,37.780381,25.0,38.0,47.0,3.0,50.0,0.940000,94.0,1.666667,5979.800000,0.183150,False,False,7.0,3.857143,27.0,3.9,La Fábrica,11,50.0,True,0.0,2023-03-23
33,0.845235,84.523493,85.0,2023-07-11,14.0,102.0,0.137255,14,2.0,0.016667,2.0,120.0,30.159706,45.736719,30.0,46.0,102.0,16.0,118.0,0.864407,86.4,3.933333,12811.233333,0.293532,False,False,21.0,4.095238,86.0,4.1,La Fábrica,11,118.0,True,0.0,2023-01-18


In [218]:
# Read the Excel file
GRW = pd.read_excel("OperadoresGRW.xlsx")

# Display the contents of the DataFrame
GRW

Unnamed: 0,adDays,brandSegmentation,campaignDays,clickPenetration,clicks,cluster,coverageOrders,coverageUsers,month,orders,ordersShare,retentionRate,salesMl,salesUsd,storeId,storeName,userPenetration,users,usersThatClicked
0,4,6. BRAND DEVELOPMENT,29,0.003661,1342,AR 4,950461,252093,2023-03-01,169,0.000177,0.125000,644071.3,2800.31000,141821,Ciabatta Sándwiches - Bustamante,0.000602,152,923
1,0,6. BRAND DEVELOPMENT,0,0.002977,342,,241600,78599,2023-03-01,32,0.000132,0.000000,87841.6,381.92000,210064,Ciabatta Sandwiches By Kitchenita - Lope de Vega,0.000407,32,234
2,0,6. BRAND DEVELOPMENT,0,0.002766,621,,597766,160850,2023-03-01,86,0.000143,0.082191,228392.3,993.01000,207898,Ciabatta Sandwiches - Palermo,0.000453,73,445
3,0,6. BRAND DEVELOPMENT,0,0.004507,1588,,864642,234053,2023-03-01,181,0.000209,0.132075,569114.3,2474.41000,202549,Ciabatta Sándwiches - Rivadavia,0.000679,159,1055
4,11,5. SIDEKICK,29,0.005910,2166,AR 2,950461,252093,2023-03-01,432,0.000454,0.188010,2053090.4,8926.48000,134275,Vicenta Empanadas - Bustamante,0.001455,367,1490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,0,6. BRAND DEVELOPMENT,9,0.004323,874,,536868,146398,2023-06-01,117,0.000217,0.000000,315497.5,1371.72826,207898,Ciabatta Sandwiches - Palermo,0.000737,108,633
168,0,6. BRAND DEVELOPMENT,9,0.002832,849,,713203,195566,2023-06-01,82,0.000114,0.000000,216135.5,939.71957,202549,Ciabatta Sándwiches - Rivadavia,0.000398,78,554
169,0,6. BRAND DEVELOPMENT,9,0.005416,386,AR (petite),135509,45045,2023-06-01,22,0.000162,0.000000,66585.5,289.50217,188593,Ciabatta Sándwiches - Martin Garcia,0.000466,21,244
170,0,6. BRAND DEVELOPMENT,9,0.004323,874,,536868,146398,2023-06-01,117,0.000217,0.000000,315497.5,1371.72826,207898,Ciabatta Sandwiches - Palermo,0.000737,108,633


In [219]:
GRW = GRW[(GRW['storeId'] == 212406)|
          (GRW['storeId'] == 202549)|
          (GRW['storeId'] == 210064)|
          (GRW['storeId'] == 212443) |
          (GRW['storeId'] == 207898) ]


In [252]:
# Read the Excel file
IND = pd.read_excel("operadoresIND.xlsx")

# Display the contents of the DataFrame
IND

In [248]:
IND = IND[(IND['branch_name'] == 'Palermo - Santa Fe') |
          (IND['branch_name'] == 'Martinez - Av Santa Fe') |
          (IND['branch_name'] == 'Almagro - Rivadavia') |
          (IND['branch_name'] == 'Monte Castro - Lope de Vega') |
          (IND['branch_name'] == 'Flores - Av Eva Perón')]

IND = IND[IND['brand_name'] == 'Ciabatta']

IND = IND[IND['app'] == 'RP']


In [243]:
# Convert the "local_date" column to datetime format
IND['local_date'] = pd.to_datetime(IND['local_date'])

# Extract the year and month components from the "local_date" column
IND['year'] = IND['local_date'].dt.year
IND['month'] = IND['local_date'].dt.month

# Create the "month" column with the desired format
IND['month'] = pd.to_datetime(IND[['year', 'month']].assign(day=1))

In [None]:
# Convert 'date' column in OPS to datetime format
OPS['date'] = pd.to_datetime(OPS['date'])

# Convert 'month' column in GRW to datetime format
GRW['month'] = pd.to_datetime(GRW['month'])

# Convert 'month' column in IND to datetime format
IND['month'] = pd.to_datetime(IND['month'])

# Perform the merge between GRW and OPS
df = pd.merge(GRW, OPS, left_on='month', right_on='date')

# Perform the merge between the result and IND
df = pd.merge(df, IND, on='month')

df


Unnamed: 0,adDays,brandSegmentation,campaignDays,clickPenetration,clicks,cluster,coverageOrders,coverageUsers,month,orders_x,ordersShare,retentionRate,salesMl,salesUsd,storeId,storeName,userPenetration,users,usersThatClicked,availability_prop_raw,availability_percent_raw,availability_percent,date,buy_back_clients,total_users,buy_back_ratio,buy_back_percent,cancelled_percent,cancelled_ratio_raw,cancelled_num_raw,orders_total_from_header_raw,cooking_time_raw,complete_delivery_time_raw,cooking_time,complete_delivery_time,courier_not_wait_sum_raw,courier_waiting_sum_raw,orders_total_with_courier_info_raw,on_time_ratio_raw,on_time_ratio_percent,orders_y,ticket_amount,perfomance,is_zombie,is_regular,num_review,mean_score_review_raw,sum_score_review_raw,mean_score_review,company_name,day_x,orders_with_delivery_info,is_in_warning,inactive,start_date,query_lat,query_lng,store_id,index_paid,index_position,store_distance,delivery_price,has_delivery_price,is_available,eta,has_saturation_cooking_time,saturation_cooking_time,has_saturation_distance,saturation_distance,year,day_y,app_store_id,branch_id,brand_id,is_active,is_own,operator_id,lat,lng
0,0,6. BRAND DEVELOPMENT,0,0.002604,659,,660307,182342,2023-05-01,103,0.000155,0.177777,237858.0,1034.16522,207898,Ciabatta Sandwiches - Palermo,0.000493,90,475,0.911221,91.122104,91.0,2023-05-01,9.0,90.0,0.1,10,3.0,0.028302,3.0,106.0,25.184857,38.85099,25.0,39.0,78.0,22.0,100.0,0.78,78.0,3.433333,7369.766667,0.256219,False,False,23.0,4.565217,105.0,4.6,La Fábrica,1,102.0,True,0.0,2023-01-18,-34.571994,-58.472636,163598.28394,0.00027,340.10268,2.154918,296.217107,1.0,0.427125,35.153846,0.0,,0.081212,2937.975894,2023.0,16.820496,163598.28394,27.154729,20.731452,0.301559,0.468845,33.881928,-34.571723,-58.465987
1,0,6. BRAND DEVELOPMENT,0,0.002604,659,,660307,182342,2023-05-01,103,0.000155,0.177777,237858.0,1034.16522,207898,Ciabatta Sandwiches - Palermo,0.000493,90,475,0.911221,91.122104,91.0,2023-05-01,9.0,90.0,0.1,10,3.0,0.028302,3.0,106.0,25.184857,38.85099,25.0,39.0,78.0,22.0,100.0,0.78,78.0,3.433333,7369.766667,0.256219,False,False,23.0,4.565217,105.0,4.6,La Fábrica,1,102.0,True,0.0,2023-01-18,-34.571994,-58.472636,163598.28394,0.00027,340.10268,2.154918,296.217107,1.0,0.427125,35.153846,0.0,,0.081212,2937.975894,2023.0,16.820496,163598.28394,27.154729,20.731452,0.301559,0.468845,33.881928,-34.571723,-58.465987
2,0,6. BRAND DEVELOPMENT,0,0.002604,659,,660307,182342,2023-05-01,103,0.000155,0.177777,237858.0,1034.16522,207898,Ciabatta Sandwiches - Palermo,0.000493,90,475,0.911221,91.122104,91.0,2023-05-01,9.0,90.0,0.1,10,3.0,0.028302,3.0,106.0,25.184857,38.85099,25.0,39.0,78.0,22.0,100.0,0.78,78.0,3.433333,7369.766667,0.256219,False,False,23.0,4.565217,105.0,4.6,La Fábrica,1,102.0,True,0.0,2023-01-18,-34.571994,-58.472636,163598.28394,0.00027,340.10268,2.154918,296.217107,1.0,0.427125,35.153846,0.0,,0.081212,2937.975894,2023.0,16.820496,163598.28394,27.154729,20.731452,0.301559,0.468845,33.881928,-34.571723,-58.465987
3,0,6. BRAND DEVELOPMENT,0,0.002604,659,,660307,182342,2023-05-01,103,0.000155,0.177777,237858.0,1034.16522,207898,Ciabatta Sandwiches - Palermo,0.000493,90,475,0.911221,91.122104,91.0,2023-05-01,9.0,90.0,0.1,10,3.0,0.028302,3.0,106.0,25.184857,38.85099,25.0,39.0,78.0,22.0,100.0,0.78,78.0,3.433333,7369.766667,0.256219,False,False,23.0,4.565217,105.0,4.6,La Fábrica,1,102.0,True,0.0,2023-01-18,-34.571994,-58.472636,163598.28394,0.00027,340.10268,2.154918,296.217107,1.0,0.427125,35.153846,0.0,,0.081212,2937.975894,2023.0,16.820496,163598.28394,27.154729,20.731452,0.301559,0.468845,33.881928,-34.571723,-58.465987
4,0,6. BRAND DEVELOPMENT,9,0.004323,874,,536868,146398,2023-06-01,117,0.000217,0.0,315497.5,1371.72826,207898,Ciabatta Sandwiches - Palermo,0.000737,108,633,0.841228,84.122784,84.0,2023-06-01,17.0,86.0,0.197674,20,0.0,0.0,0.0,100.0,30.594927,45.266275,31.0,45.0,88.0,12.0,100.0,0.88,88.0,3.333333,8036.066667,0.248756,False,False,22.0,3.863636,85.0,3.9,La Fábrica,1,100.0,True,0.0,2023-01-18,-34.571987,-58.475341,166832.818375,0.009927,312.716711,2.139417,367.422537,1.0,0.412473,35.090371,0.0,,0.113986,2810.0997,2023.0,15.261176,166832.818375,31.484562,21.767098,0.363456,0.419183,34.777709,-34.572162,-58.468096
5,0,6. BRAND DEVELOPMENT,9,0.004323,874,,536868,146398,2023-06-01,117,0.000217,0.0,315497.5,1371.72826,207898,Ciabatta Sandwiches - Palermo,0.000737,108,633,0.841228,84.122784,84.0,2023-06-01,17.0,86.0,0.197674,20,0.0,0.0,0.0,100.0,30.594927,45.266275,31.0,45.0,88.0,12.0,100.0,0.88,88.0,3.333333,8036.066667,0.248756,False,False,22.0,3.863636,85.0,3.9,La Fábrica,1,100.0,True,0.0,2023-01-18,-34.571987,-58.475341,166832.818375,0.009927,312.716711,2.139417,367.422537,1.0,0.412473,35.090371,0.0,,0.113986,2810.0997,2023.0,15.261176,166832.818375,31.484562,21.767098,0.363456,0.419183,34.777709,-34.572162,-58.468096


In [None]:
column_names = df.columns.tolist()

# Display the column names
print(column_names)



In [None]:
# Calcular la matriz de correlación
correlation_matrix = df.corr()

# Filtrar las correlaciones con 'Ventas QS (Ordenes)'
ventas_qs_correlations = correlation_matrix['orders_x']

# Ordenar las correlaciones de mayor a menor
ventas_qs_correlations = ventas_qs_correlations.sort_values(ascending=False)

# Mostrar las correlaciones
print(ventas_qs_correlations)

campaignDays                          1.0
clickPenetration                      1.0
clicks                                1.0
salesMl                               1.0
complete_delivery_time_raw            1.0
buy_back_clients                      1.0
courier_not_wait_sum_raw              1.0
on_time_ratio_percent                 1.0
userPenetration                       1.0
orders_x                              1.0
salesUsd                              1.0
cooking_time_raw                      1.0
index_paid                            1.0
cooking_time                          1.0
buy_back_ratio                        1.0
buy_back_percent                      1.0
has_saturation_distance               1.0
complete_delivery_time                1.0
brand_id                              1.0
is_active                             1.0
on_time_ratio_raw                     1.0
ordersShare                           1.0
users                                 1.0
store_id                          

  correlation_matrix = df.corr()


In [None]:
ventas_qs_correlations.to_excel('ventascorrelacion.xlsx', index=True)

In [None]:
columns_to_drop = [
    'users',
    'orders_with_delivery_info',
    'courier_not_wait_sum_raw',
    'sum_score_review_raw',
    'orders_total_with_courier_info_raw',
    'total_users',
    'orders_total_from_header_raw',
    'orders_y',
    'ticket_amount',
    'is_in_warning',
    'num_review',
    'usersThatClicked',
    'salesMl',
    'salesUsd',
    'mean_score_review_raw',
    'courier_waiting_sum_raw',
    'buy_back_ratio',
    'buy_back_percent',
    'complete_delivery_time',
    'complete_delivery_time_raw',
    'on_time_ratio_raw',
    'on_time_ratio_percent',
    'cooking_time',
    'cooking_time_raw',
    'n_queries',
    'availability_percent',
    'availability_percent_raw',
    'availability_prop_raw',
    'cancelled_num_raw',
    'position_q1',
    'q_max_distance',
    'position_q3',
    'position_max',
    'cancelled_ratio_raw',
    'is_zombie',
    'adDays',
    'campaignDays',
    'storeId',
    'is_regular',
    'inactive',
    'day',
    'store_id',
    'n_ad',
    'perfomance',
    'ordersShare',
    'position_min'
    ]

# Drop the specified columns from the DataFrame
df = df.drop(columns=columns_to_drop)

KeyError: "['n_queries', 'position_q1', 'q_max_distance', 'position_q3', 'position_max', 'day', 'n_ad', 'position_min'] not found in axis"

In [None]:
# Calcular la matriz de correlación
correlation_matrix = df.corr()

# Filtrar las correlaciones con 'Ventas QS (Ordenes)'
ventas_qs_correlations = correlation_matrix['orders_x']

# Ordenar las correlaciones de mayor a menor
ventas_qs_correlations = ventas_qs_correlations.sort_values(ascending=False)

# Mostrar las correlaciones
print(ventas_qs_correlations)

orders_x             1.000000
position_median      0.914265
userPenetration      0.912427
clicks               0.890161
mean_score_review    0.859188
buy_back_clients     0.833321
clickPenetration     0.751383
coverageOrders       0.743955
coverageUsers        0.719050
retentionRate        0.619635
cancelled_percent   -0.810849
Name: orders_x, dtype: float64


  correlation_matrix = df.corr()


In [None]:
df

Unnamed: 0,brandSegmentation,clickPenetration,clicks,cluster,coverageOrders,coverageUsers,month,orders_x,retentionRate,storeName,userPenetration,buy_back_clients,cancelled_percent,mean_score_review,company_name,date,position_median
0,6. BRAND DEVELOPMENT,0.001677,330,,427563,127561,2023-02-01,44,0.026315,Ciabatta Sandwiches - Palermo,0.000297,3.0,8.0,3.2,La Fábrica,2023-02-01,206.2
1,6. BRAND DEVELOPMENT,0.002766,621,,597766,160850,2023-03-01,86,0.082191,Ciabatta Sandwiches - Palermo,0.000453,9.0,7.0,4.2,La Fábrica,2023-03-01,400.0
2,6. BRAND DEVELOPMENT,0.003631,860,,547277,150899,2023-04-01,101,0.068181,Ciabatta Sandwiches - Palermo,0.000583,10.0,3.0,4.5,La Fábrica,2023-04-01,378.75
3,6. BRAND DEVELOPMENT,0.003844,785,,486514,142555,2023-04-01,89,0.068181,Ciabatta Sandwiches - Palermo,0.000547,10.0,3.0,4.5,La Fábrica,2023-04-01,378.75
4,6. BRAND DEVELOPMENT,0.003903,860,,547277,150899,2023-04-01,101,0.068181,Ciabatta Sandwiches - Palermo,0.000583,10.0,3.0,4.5,La Fábrica,2023-04-01,378.75
5,6. BRAND DEVELOPMENT,0.002604,659,,660307,182342,2023-05-01,103,0.177777,Ciabatta Sandwiches - Palermo,0.000493,17.0,0.0,4.0,La Fábrica,2023-05-01,376.710526


In [None]:
import plotly.express as px

# Filtrar las correlaciones con 'Ventas QS (Ordenes)'
ventas_qs_correlations = df.corr()['orders_x'].drop('orders_x')

# Crear el gráfico de dispersión con un tamaño de figura mayor
fig = px.scatter(ventas_qs_correlations, x=ventas_qs_correlations.values, y=ventas_qs_correlations.index,
                 color=ventas_qs_correlations.values, color_continuous_scale='RdBu')

# Configurar los ejes y el título del gráfico
fig.update_layout(xaxis_title='Coeficiente de correlación Lunch', yaxis_title='Variables',
                  title='Correlación con Ordenes')

# Ajustar el tamaño de la figura
fig.update_layout(height=600, width=1000)

# Ajustar el espaciado entre los ticks del eje y y la orientación del texto
fig.update_yaxes(tickmode='array', tickvals=ventas_qs_correlations.index, ticktext=ventas_qs_correlations.index,
                 tickfont=dict(size=10))

# Mostrar el gráfico
fig.show()

  ventas_qs_correlations = df.corr()['orders_x'].drop('orders_x')
