In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import calendar

In [28]:
'''
Tabla de ventas con año y mes
Este script carga un archivo CSV con datos de ventas mensuales, extrae el año y el mes de una columna de fecha,
'''
df_sales_month = pd.read_csv('sales.csv')
df_sales_month['calmonth'] = pd.to_datetime(df_sales_month['calmonth'], format='%Y%m')
df_sales_month['year'] = df_sales_month['calmonth'].dt.year
df_sales_month['month'] = df_sales_month['calmonth'].dt.month
# df_sales_month = df_sales_month.drop(columns=['calmonth'])
df_sales_month

Unnamed: 0,cooler_id,customer_id,calmonth,amount,year,month
0,42d9b14c937541b8f1287ce7251d1e29f1f8eb58dbcd7e...,e21b830bc395b5a355f68a698c55ad95474f929d8e2521...,2025-03-01,76.8887,2025,3
1,42d9b14c937541b8f1287ce7251d1e29f1f8eb58dbcd7e...,e21b830bc395b5a355f68a698c55ad95474f929d8e2521...,2024-12-01,131.0627,2024,12
2,42d9b14c937541b8f1287ce7251d1e29f1f8eb58dbcd7e...,e21b830bc395b5a355f68a698c55ad95474f929d8e2521...,2024-07-01,180.8420,2024,7
3,42d9b14c937541b8f1287ce7251d1e29f1f8eb58dbcd7e...,e21b830bc395b5a355f68a698c55ad95474f929d8e2521...,2025-02-01,82.3243,2025,2
4,42d9b14c937541b8f1287ce7251d1e29f1f8eb58dbcd7e...,e21b830bc395b5a355f68a698c55ad95474f929d8e2521...,2023-10-01,201.7541,2023,10
...,...,...,...,...,...,...
204731,661cc6a46677515e7801eda42d84b6242778a9d77f6062...,021ce98d216fdee3533129eb75d956978858eade78d6a4...,2025-04-01,1244.2283,2025,4
204732,661cc6a46677515e7801eda42d84b6242778a9d77f6062...,021ce98d216fdee3533129eb75d956978858eade78d6a4...,2023-06-01,1829.9476,2023,6
204733,661cc6a46677515e7801eda42d84b6242778a9d77f6062...,021ce98d216fdee3533129eb75d956978858eade78d6a4...,2023-04-01,396.8060,2023,4
204734,661cc6a46677515e7801eda42d84b6242778a9d77f6062...,021ce98d216fdee3533129eb75d956978858eade78d6a4...,2024-06-01,1818.3142,2024,6


In [29]:
'''
Tabla de ventas por cliente
Este script agrupa los datos de ventas por cliente, calculando el total de ventas y el número de enfriadores únicos por cliente.
'''
import pandas as pd

df_sales_month = pd.read_csv('sales.csv')
df_sales_month['calmonth'] = pd.to_datetime(df_sales_month['calmonth'], format='%Y%m')
df_sales_month['year'] = df_sales_month['calmonth'].dt.year
df_sales_month['month'] = df_sales_month['calmonth'].dt.month

# Agrupar por customer_id
df_customers = df_sales_month.groupby('customer_id').agg(
    total_amount=('amount', 'sum'),
    coolers_per_customer=('cooler_id', 'nunique')
).reset_index()

# Ordenar por total_amount de mayor a menor
df_customers = df_customers.sort_values(by='total_amount', ascending=False)

df_customers.head()

Unnamed: 0,customer_id,total_amount,coolers_per_customer
3181,798b848f4d378ec7b852731242ff87992d887953149726...,1270383.0,1
5654,dccbe339a93d0edf234e7a2e9f666c51e8e98f79a02f83...,1129673.0,2
4055,9cdd7f590855940073cc14d4e14e82f0deb97c487cf73c...,990947.1,4
5021,c41c6aa34bbf063215dcca7858fd1cc6a62c01072ea567...,794303.6,6
3432,8307133d45165ffe43f4dee589fffaf6fdbf1993e63a5f...,565812.2,2


In [30]:
'''
Tabla de enfriadores con advertencias
Este script carga un archivo CSV con datos de advertencias de enfriadores y los asocia con los clientes que los compraron.
'''
# Cargar los datos
df_warnings = pd.read_csv('warnings.csv')          # Contiene solo cooler_id
df_customers = pd.read_csv('sales.csv')            # Contiene cooler_id y customer_id

# Asociar los cooler_id con customer_id
coolers_with_warning = pd.merge(
    df_warnings,
    df_customers[['cooler_id', 'customer_id']],
    on='cooler_id',
    how='left'
)

# Eliminar duplicados en caso de múltiples ventas por cooler
coolers_with_warning = coolers_with_warning.drop_duplicates()

# Mostrar los resultados
df_coolers_damaged = coolers_with_warning.copy()
df_coolers_damaged


Unnamed: 0,cooler_id,customer_id
0,483fe5b436f370171a15a80520839049bd7c4b8416c28e...,3544a9c44aff0b0f4350dafe509cc3dc5866cc5b4a00ad...
28,51acd7bdf1c17f571df58541440547e2bf6e30e322cd1c...,77eeef8c6c0cf443f317fd6235cdb0de6bfa90bffe82cf...
56,b6edb73e7212f812ffe4b38eae69e06f9f2940d76238f4...,14f33a4ef530da4fc534c603ef8d9aadb001c45c049372...
84,8d3260b5f7e49fff02b4743037a52406b19279d9cd0144...,b9a30d64d6b7d901fae363c19b1b23cabada0973532aca...
112,75e325ac11632c676a299c5d28f3dca83bd628deb8b62f...,69d7dfc1a92fba5676cf72010d37f8dee0ba981843374e...
140,12ef32f2133e68b37554f01de6a72e38c1fa1985585a71...,2a72c2e329e7c8651cd04a6571f81c6c5d778603ef2b4a...
168,ab25aa1218e09fb535191fcff9a3c3d2907b1617275d0d...,710c393ab7b9bfd577b3f984eadae2ae5dec3cfb55787e...
196,0fb620ffe915a5255914b611cfae5a075274893fab35bd...,7f3092fede8553071d9a6bc85c851978266aabe0e5ec9a...
224,72980eb969bf38742a27b57becd625f4a3f03ae6392fbf...,fc0a9bf1a4e7448d3a9a558a0b60372d699f8c81fbfce5...
252,09e1f4de2bbe2ba0773df2e7ab64757ad18a358d20000d...,b086b58d1380ad9cfbb36aceda557cef68e755d30fe093...


In [31]:
df_coolers = pd.read_csv('coolers_cleaned.csv')  # Contiene cooler_id y otros detalles
df_cooler_with_warnings = df_coolers[df_coolers['cooler_id'].isin(df_coolers_damaged['cooler_id'])]
df_filtro_sin_warnings = df_coolers[~df_coolers['cooler_id'].isin(df_coolers_damaged['cooler_id'])]

# Mostrar el resultado filtrado
df_cooler_with_warnings
#promedios = df_cooler_with_warnings.mean(numeric_only=True)
promedios = df_filtro_sin_warnings.mean(numeric_only=True)
print(type(promedios))
df_filtro_sin_warnings

diferencia = df_cooler_with_warnings - promedios



<class 'pandas.core.series.Series'>
