In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from datetime import datetime
from utiles import json_loader, obtener_df


pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

json_list = json_loader('jsons')

df = obtener_df(json_list)


In [2]:

df_type_counts = df.groupby('type').size().reset_index(name='counts')
df_type_qualification = df.groupby('type')['qualification'].mean().reset_index(name='qualification')
px.pie(
    df_type_counts,
    names='type',
    values='counts',
    title='Proporción de establecimientos por tipo de local:',
    labels={'type': 'Tipo', 'counts': 'Cantidad'},
    color='type',
    height=400
).show()
px.bar(
    df_type_qualification,
    x='type',
    y='qualification',
    title='Calificación promedio por tipo de local:',
    labels={'type': 'Tipo', 'qualification': 'Calificación'},
    color='type',
    height=400,
    range_y= (df_type_qualification['qualification'].min() - 0.1, df_type_qualification['qualification'].max() + 0.1)
)

In [3]:
df_type_facebook = df.groupby('contact.facebook').count().reset_index()
df_type_instagram = df.groupby('contact.instagram').count().reset_index()
df_type_website = df.groupby('contact.website').count().reset_index()
df_type_phone = df.groupby('contact.phone').count().reset_index()

df_type_facebook['contact.facebook'] = df_type_facebook['contact.facebook'].map({True: 'Tienen', False: 'No tienen'})
df_type_instagram['contact.instagram'] = df_type_instagram['contact.instagram'].map({True: 'Tienen', False: 'No tienen'})
df_type_website['contact.website'] = df_type_website['contact.website'].map({True: 'Tienen', False: 'No tienen'})
df_type_phone['contact.phone'] = df_type_phone['contact.phone'].map({True: 'Tienen', False: 'No tienen'})

px.bar(
    df_type_facebook,
    x='contact.facebook',
    y='name',
    labels={'contact.facebook': 'Facebook', 'name': 'Cantidad de locales'},
    color='contact.facebook',
    title='Cantidad de locales que poseen Facebook',
    height=300
).show()

px.bar(
    df_type_instagram,
    x='contact.instagram',
    y='name',
    labels={'contact.instagram': 'Instagram', 'name': 'Cantidad de locales'},
    color='contact.instagram',
    title='Cantidad de locales que poseen Instagram',
    height=300
).show()

px.bar(
    df_type_website,
    x='contact.website',
    y='name',
    labels={'contact.website': 'Website', 'name': 'Cantidad de locales'},
    color='contact.website',
    title='Cantidad de locales que poseen Website',
    height=300
).show()

px.bar(
    df_type_phone,
    x='contact.phone',
    y='name',
    labels={'contact.phone': 'Telefono', 'name': 'Cantidad de locales'},
    color='contact.phone',
    title='Cantidad de locales que poseen Telefono',
    height=300
).show()

In [4]:
df_bar = df[df['type'] == 'bar']
df_restaurant = df[df['type'] == 'restaurant']
df_barrestaurant = df[df['type'] == 'barrestaurant']

df_bar = df_bar['speciality'].explode().value_counts().reset_index()
df_restaurant = df_restaurant['speciality'].explode().value_counts().reset_index()
df_barrestaurant = df_barrestaurant['speciality'].explode().value_counts().reset_index()

fig_bar = px.bar(
    df_bar,
    x='speciality',
    y='count',
    title='Especialidades más comunes en los Bares:',
    labels={'speciality': 'Especialidad', 'count': 'Cantidad'},
    color='speciality',
    height=550
)

fig_restaurant = px.bar(
    df_restaurant,
    x='speciality',
    y='count',
    title='Especialidades más comunes en los Restaurantes:',
    labels={'speciality': 'Especialidad', 'count': 'Cantidad'},
    color='speciality',
    height=550
)

fig_barrestaurant = px.bar(
    df_barrestaurant,
    x='speciality',
    y='count',
    title='Especialidades más comunes en los Bares-restaurantes:',
    labels={'speciality': 'Especialidad', 'count': 'Cantidad'},
    color='speciality',
    height=550
)


fig_bar.show()
fig_restaurant.show()
fig_barrestaurant.show()

In [5]:
# Filtrar los datos para obtener solo los bar restaurant
df_barrestaurant = df[df['type'] == 'barrestaurant']

# Explode the services column to count each service individually
df_services = df_barrestaurant['services'].explode().value_counts().reset_index()
df_services.columns = ['service', 'count']

# Crear el gráfico de barras apiladas
fig = px.bar(
    df_services,
    x='service',
    y='count',
    title='Servicios más comunes y menos comunes en los Bar Restaurant',
    labels={'service': 'Servicio', 'count': 'Cantidad'},
    color='service',
    height=700
)

fig.show()

In [6]:
menu_categories = [
    'menu.starters', 'menu.salads', 'menu.vegetables', 'menu.meats', 'menu.beef',
    'menu.seafood', 'menu.burgers', 'menu.desserts', 'menu.cocktails', 'menu.twist_cocktails',
    'menu.classic_cocktails', 'menu.tiki_cocktails', 'menu.tiki_author_cocktails', 'menu.others',
    'menu.drinks'
]

menu_counts = {category: df[category].explode().notna().sum() for category in menu_categories}

df_menu_counts = pd.DataFrame(list(menu_counts.items()), columns=['category', 'count'])
display(df_menu_counts)

fig = px.bar(
    df_menu_counts,
    x='category',
    y='count',
    title='Cantidad de platos en cada categoría del menú',
    labels={'category': 'Categoría', 'count': 'Cantidad'},
    color='category',
    height=550
)

fig.show()

Unnamed: 0,category,count
0,menu.starters,1892
1,menu.salads,77
2,menu.vegetables,58
3,menu.meats,768
4,menu.beef,151
5,menu.seafood,583
6,menu.burgers,125
7,menu.desserts,488
8,menu.cocktails,413
9,menu.twist_cocktails,14


In [7]:
#df2 = df[['municipality', 'menu_starters_mean', 'menu_salads_min', 'menu_vegetables_min', 'menu_meats_min']]
df2 = df.groupby('municipality')['menu_starters_mean'].mean().reset_index()
#df 
display(df2)
px.bar(
    df2,
    x='municipality',
    y='menu_starters_mean'
)

Unnamed: 0,municipality,menu_starters_mean
0,Arroyo Naranjo,832.841424
1,Boyeros,1069.910435
2,Centro Habana,1109.281573
3,Cerro,673.635185
4,Cotorro,812.905576
5,Diez de Octubre,1523.209665
6,Guanabacoa,718.759019
7,Habana Vieja,1863.626955
8,Habana del Este,771.061557
9,La Lisa,714.783333


In [8]:
#df_drinks_prices = df[drink_price_columns]
#print(df['menu.drinks'])
df_drinks_prices = df.rename(columns={'drinks.nacional': 'nacional', 'drinks.importada': 'importada'})

df_drinks_prices = df_drinks_prices.melt(var_name='drink', value_name='price')
#display(df_drinks_prices)
df_drinks_prices['origin'] = df_drinks_prices['drink'].apply(lambda x: 'Nacional' if 'nacional' in x.lower() else 'Importada')

df_drinks_prices = df_drinks_prices.dropna(subset=['price'])

df_drinks_prices['price'] = pd.to_numeric(df_drinks_prices['price'], errors='coerce')

total_drinks = df_drinks_prices['origin'].value_counts().sum()
percentages = df_drinks_prices['origin'].value_counts() / total_drinks * 100

fig = px.violin(
    df_drinks_prices,
    x='origin',
    y='price',
    color='origin',
    box=True,
    points='all',
    title='Distribución de precios de las bebidas (Nacionales vs Importadas)',
    labels={'origin': 'Origen', 'price': 'Precio'}
)

fig.update_layout(
    title=f'Distribución de precios de las bebidas (Nacionales: {percentages["Nacional"]:.2f}%, Importadas: {percentages["Importada"]:.2f}%)'
)

fig.show()