# ETL Business Yelp


Primero cargamos las dependencias que usaremos para el ETL y el archivo de Business yelp.

In [183]:
import numpy as np
import pandas as pd
import json
import ast
from ast import literal_eval

file_path = '/content/drive/MyDrive/yelp/Copia de business.pkl'

df = pd.read_pickle(file_path)
df.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,state.1,postal_code.1,latitude.1,longitude.1,stars.1,review_count.1,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,,93101,34.426679,-119.711197,5.0,7,...,,,,,,,,,,
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,,63123,38.551126,-90.335695,3.0,15,...,,,,,,,,,,
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,,85711,32.223236,-110.880452,3.5,22,...,,,,,,,,,,


Observamos la primera fila y las columnas.
Aqui encontramos columnas duplicadas.

In [184]:
df.iloc[0]

business_id                                Pns2l4eNsfO8kk83dixA6A
name                                     Abby Rappoport, LAC, CMQ
address                                    1616 Chapala St, Ste 2
city                                                Santa Barbara
state                                                         NaN
postal_code                                                 93101
latitude                                                34.426679
longitude                                             -119.711197
stars                                                         5.0
review_count                                                    7
is_open                                                         0
attributes                          {'ByAppointmentOnly': 'True'}
categories      Doctors, Traditional Chinese Medicine, Naturop...
hours                                                        None
business_id                                                   NaN
name      

Dropeamos las columnas duplicadas.

In [185]:
df_no_duplicate_cols = df.loc[:, ~df.columns.duplicated()].copy()
df_no_duplicate_cols.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."


Verificamos las columnas.

In [186]:
df_no_duplicate_cols.iloc[0]

business_id                                Pns2l4eNsfO8kk83dixA6A
name                                     Abby Rappoport, LAC, CMQ
address                                    1616 Chapala St, Ste 2
city                                                Santa Barbara
state                                                         NaN
postal_code                                                 93101
latitude                                                34.426679
longitude                                             -119.711197
stars                                                         5.0
review_count                                                    7
is_open                                                         0
attributes                          {'ByAppointmentOnly': 'True'}
categories      Doctors, Traditional Chinese Medicine, Naturop...
hours                                                        None
Name: 0, dtype: object

Observamos un valor.

In [187]:
df_no_duplicate_cols['categories'].iloc[0]

'Doctors, Traditional Chinese Medicine, Naturopathic/Holistic, Acupuncture, Health & Medical, Nutritionists'

### De la ultima verificacion vemos que tendremos que desanidar las siguientes columnas:

A. Column attributes.

B. column categories.

C. Column hours.

Decidimos centrarnos en Florida!!!


### Pasos Previos a desanidar:

Filtramos primero por codigo postal, luego filtramos solo ciudades, corregimos la columna 'state' y filtramos las categorias y subcategorias mas importantes.

In [188]:
# FILTRO POR CODIGO POSTAL

# Convertimos los codigos postal a numerico para filtrar mejor
df_no_duplicate_cols['postal_code'] = pd.to_numeric(df_no_duplicate_cols['postal_code'], errors='coerce')

# Filtramos las fila por codigos postale entre 32003 y 39819
df_filtered_by_code_postal_FLORIDA = df_no_duplicate_cols[(df_no_duplicate_cols['postal_code'] >= 32003) & (df_no_duplicate_cols['postal_code'] <= 34997)].copy()
# Fuente codigo postal https://www.zipcode.com.ng/2022/06/list-of-florida-zip-codes-.html

In [189]:
# FILTRO POR CIUDADES FLORIDA

df_florida_cities = pd.read_json('/content/drive/MyDrive/Otros/us-cities-table-for-florida.json')
# Fuente Json https://worldpopulationreview.com/states/cities/florida

florida_names_list = df_florida_cities['name'].tolist()

florida_cities_postal_code = df_filtered_by_code_postal_FLORIDA[df_filtered_by_code_postal_FLORIDA['city'].isin(florida_names_list)]



In [190]:
# CORREGIMOS COLUMNA STATE

florida_cities_postal_code.loc[:, 'state'] = 'FL'

florida_cities_postal_code = florida_cities_postal_code.copy()
florida_cities_postal_code.to_csv('/content/drive/MyDrive/Otros/borrador_florida_cities_postal_code.csv', index=False)
florida_cities_postal_code.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  florida_cities_postal_code.loc[:, 'state'] = 'FL'


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707.0,27.76659,-82.732983,3.5,5,1,,"Synagogues, Religious Organizations","{'Monday': '9:0-17:0', 'Tuesday': '9:0-17:0', ..."
13,jaxMSoInw8Poo3XeMJt8lQ,Adams Dental,15 N Missouri Ave,Clearwater,FL,33755.0,27.966235,-82.787412,5.0,10,1,{'ByAppointmentOnly': 'True'},"General Dentistry, Dentists, Health & Medical,...","{'Monday': '7:30-15:30', 'Tuesday': '7:30-15:3..."
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771.0,27.916116,-82.760461,4.5,100,0,"{'OutdoorSeating': 'False', 'RestaurantsGoodFo...","Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'..."


In [191]:
# 10 CATEGORIAS MAS IMPORTANTES EN YELP
# Dividir las categorías en subcategorías y contarlas
subcategories = florida_cities_postal_code['categories'].str.split(', ').explode()

# INcluded categories
categories = [ 'Restaurants','Afghan','African','Senegalese','South African','American (New)','American (Traditional)','Andalusian','Arabian', 'Arab Pizza','Argentine',
              'Armenian','Asian Fusion','Asturian','Australian','Austrian','Baguettes','Bangladeshi','Barbeque','Basque','Bavarian','Beer Garden','Beer Hall','Beisl','Belgian',
              'Flemish','Bistros','Black Sea','Brasseries','Brazilian','Brazilian Empanadas','Central Brazilian','Northeastern Brazilian','Northern Brazilian','Rodizios',
              'Breakfast & Brunch','Pancakes','British','Buffets','Bulgarian','Burgers','Burmese','Cafes','Themed Cafes','Cafeteria','Cajun/Creole','Cambodian','Canadian (New)',
              'Canteen','Caribbean','Dominican','Haitian','Puerto Rican','Trinidadian','Catalan','Cheesesteaks','Chicken Shop','Chicken Wings','Chilean','Chinese','Cantonese',
              'Congee','Dim Sum','Fuzhou','Hainan','Hakka','Henghwa','Hokkien','Hunan','Pekinese','Shanghainese','Szechuan','Teochew','Comfort Food','Corsican','Creperies',
              'Cuban','Curry Sausage','Cypriot','Czech','Czech/Slovakian','Danish','Delis','Diners','Dinner Theater','Dumplings','Eastern European','Eritrean','Ethiopian',
              'Fast Food','Filipino','Fischbroetchen','Fish & Chips','Flatbread','Fondue','Food Court','Food Stands','Freiduria','French','Alsatian','Auvergnat','Berrichon',
              'Bourguignon','Mauritius','Nicoise','Provencal','Reunion','French Southwest','Galician','Game Meat','Gastropubs','Georgian','German','Baden','Eastern German',
              'Franconian','Hessian','Northern German','Palatine','Rhinelandian','Giblets','Gluten-Free','Greek','Guamanian','Halal','Hawaiian','Heuriger','Himalayan/Nepalese',
              'Honduran','Hong Kong Style Cafe','Hot Dogs','Hot Pot','Hungarian','Iberian','Indian','Indonesian','International','Irish','Island Pub','Israeli','Italian',
              'Abruzzese','Altoatesine','Apulian','Calabrian','Cucina Campana','Emilian','Friulan','Ligurian','Lumbard','Napoletana','Piemonte','Roman','Sardinian','Sicilian',
              'Tuscan','Venetian','Japanese','Blowfish','Conveyor Belt Sushi','Donburi','Gyudon','Oyakodon','Hand Rolls','Horumon','Izakaya','Japanese Curry','Kaiseki',
              'Kushikatsu','Oden','Okinawan','Okonomiyaki','Onigiri','Ramen','Robatayaki','Soba','Sukiyaki','Takoyaki','Tempura','Teppanyaki','Tonkatsu','Udon','Unagi',
              'Western Style Japanese Food','Yakiniku','Yakitori','Jewish','Kebab','Kopitiam','Korean','Kosher','Kurdish','Laos','Laotian','Latin American','Colombian',
              'Salvadoran','Venezuelan','Live/Raw Food','Lyonnais','Malaysian','Mamak','Nyonya','Meatballs','Mediterranean','Falafel','Mexican','Eastern Mexican','Jaliscan',
              'Northern Mexican','Oaxacan','Pueblan','Tacos','Tamales','Yucatan','Middle Eastern','Egyptian','Lebanese','Milk Bars','Modern Australian','Modern European',
              'Mongolian','Moroccan','New Mexican Cuisine','New Zealand','Nicaraguan','Night Food','Nikkei','Noodles','Norcinerie','Open Sandwiches','Oriental','Pakistani',
              'Pan Asian','Parent Cafes','Parma','Persian/Iranian','Peruvian','PF/Comercial','Pita','Pizza','Polish','Pierogis','Polynesian','Pop-Up Restaurants','Portuguese',
              'Alentejo','Algarve','Azores','Beira','Fado Houses','Madeira','Minho','Ribatejo','Tras-os-Montes','Potatoes','Poutineries','Pub Food','Rice','Romanian',
              'Rotisserie Chicken','Russian','Salad','Sandwiches','Scandinavian','Schnitzel','Scottish','Seafood','Serbo Croatian','Signature Cuisine','Singaporean',
              'Slovakian','Somali','Soul Food','Soup','Southern','Spanish','Arroceria/Paella','Sri Lankan','Steakhouses','Supper Clubs','Sushi Bars','Swabian','Swedish',
              'Swiss Food','Syrian','Tabernas','Taiwanese','Tapas Bars','Tapas/Small Plates','Tavola Calda','Tex-Mex','Thai','Traditional Norwegian','Traditional Swedish',
              'Trattorie','Turkish','Chee Kufta','Gozleme','Homemade Food','Lahmacun','Ottoman','Cuisine','Turkish','Ravioli','Ukrainian','Uzbek','Vegan','Vegetarian','Venison',
              'Vietnamese','Waffles','Wok','Wraps','Yugoslav']

#Fuente: https://blog.yelp.com/businesses/yelp_category_list/

category_counts = subcategories[subcategories.isin(categories)].value_counts() #/ len(florida_cities_postal_code)

top_category_names = category_counts.head(12).index.tolist()

print(top_category_names)

category_counts.head(11)

['Restaurants', 'American (Traditional)', 'Sandwiches', 'Breakfast & Brunch', 'Fast Food', 'American (New)', 'Pizza', 'Burgers', 'Seafood', 'Italian', 'Mexican', 'Cafes']


Restaurants               6044
American (Traditional)    1005
Sandwiches                1003
Breakfast & Brunch         753
Fast Food                  708
American (New)             697
Pizza                      679
Burgers                    601
Seafood                    577
Italian                    492
Mexican                    485
Name: categories, dtype: int64

## 1. Columna attributes

In [192]:
print(florida_cities_postal_code.iloc[5]['attributes'])
print(type(florida_cities_postal_code.iloc[5]['attributes']))

{'BusinessParking': "{'garage': False, 'street': False, 'validated': False, 'lot': True, 'valet': False}", 'BikeParking': 'False', 'OutdoorSeating': 'True', 'RestaurantsDelivery': 'False', 'GoodForKids': 'True', 'RestaurantsTakeOut': 'True', 'BusinessAcceptsCreditCards': 'True', 'Caters': 'True', 'NoiseLevel': "u'quiet'", 'WiFi': "'no'", 'HasTV': 'True', 'Ambience': "{'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'divey': False, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': True}", 'RestaurantsAttire': "u'casual'", 'RestaurantsReservations': 'True', 'Alcohol': "u'full_bar'", 'RestaurantsGoodForGroups': 'True', 'RestaurantsPriceRange2': '2'}
<class 'dict'>


## 1.1 Listamos los atributos que vamos a analizar, despues de hecharle un vistazo a varios valores y coordinar con el equipo:

'RestaurantsPriceRange2'

'BusinessAcceptsCreditCards'

'RestaurantsReservations'

'Ambience'

'GoodForKids'

'RestaurantsDelivery'

'OutdoorSeating'

'Alcohol'

'RestaurantsGoodForGroups'


## 1.2 Transformamos teniendo en cuenta la lista de arriba.

In [193]:
desired_keys = ['RestaurantsPriceRange2','BusinessAcceptsCreditCards','RestaurantsReservations','Ambience','GoodForKids','RestaurantsDelivery','OutdoorSeating','Alcohol','RestaurantsGoodForGroups']

# Esta funcion verifica si el valor de la celda en la fila tiene formato diccionario y luego extrae la llave del diccionario.
def format_attributes_dict(attributes_dict):
    if isinstance(attributes_dict, dict):
        filtered_dict = {k: attributes_dict[k] for k in desired_keys if k in attributes_dict}
        return filtered_dict
    else:
        return attributes_dict

# Aplicamos la funcion de arriba y creamos las nuevas columnas.
florida_cities_postal_code['attributes'] = florida_cities_postal_code['attributes'].apply(format_attributes_dict)

# Concatenamos las nuevas columnas al DataFrame original.
florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)


  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.concat([florida_cities_postal_code, florida_cities_postal_code['attributes'].apply(pd.Series)], axis=1)
  florida_cities_postal_code = pd.

In [194]:
florida_cities_postal_code.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,hours,RestaurantsPriceRange2,BusinessAcceptsCreditCards,RestaurantsReservations,Ambience,GoodForKids,RestaurantsDelivery,OutdoorSeating,Alcohol,RestaurantsGoodForGroups
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707.0,27.76659,-82.732983,3.5,5,...,"{'Monday': '9:0-17:0', 'Tuesday': '9:0-17:0', ...",,,,,,,,,
13,jaxMSoInw8Poo3XeMJt8lQ,Adams Dental,15 N Missouri Ave,Clearwater,FL,33755.0,27.966235,-82.787412,5.0,10,...,"{'Monday': '7:30-15:30', 'Tuesday': '7:30-15:3...",,,,,,,,,
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771.0,27.916116,-82.760461,4.5,100,...,"{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'...",1.0,True,False,"{'romantic': False, 'intimate': False, 'touris...",True,True,False,u'none',False


In [195]:
florida_cities_postal_code.iloc[0]

business_id                                              qkRM_2X51Yqxk3btlwAQIg
name                                                             Temple Beth-El
address                                                      400 Pasadena Ave S
city                                                             St. Petersburg
state                                                                        FL
postal_code                                                             33707.0
latitude                                                               27.76659
longitude                                                            -82.732983
stars                                                                       3.5
review_count                                                                  5
is_open                                                                       1
attributes                                                                 None
categories                              

## 1.3 Encontramos que la columna Ambience tiene mas datos a desanidar y son importantes para nuestro analisis.

In [196]:
print(type(florida_cities_postal_code['Ambience'].iloc[15]))
florida_cities_postal_code['Ambience'].iloc[15]

<class 'str'>


"{'touristy': False, 'hipster': False, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': False, 'upscale': False, 'classy': False, 'casual': False}"

In [197]:
import ast
import numpy as np
import pandas as pd

# Este paso es crucial porque el dato figura con formato "str" pero es realidad es un diccionario, la funcion lambda transforma de str a dict el dato para luego crear nuevas columnas.
florida_cities_postal_code['Ambience'] = florida_cities_postal_code['Ambience'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else {})

desired_keys = ['touristy', 'hipster', 'romantic', 'divey', 'intimate', 'trendy', 'upscale', 'classy', 'casual']

for key in desired_keys:
    florida_cities_postal_code[key] = florida_cities_postal_code['Ambience'].apply(lambda x: x.get(key, None) if isinstance(x, dict) else None)

florida_cities_postal_code.drop(columns=['Ambience'], inplace=True)

florida_cities_postal_code.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,RestaurantsGoodForGroups,touristy,hipster,romantic,divey,intimate,trendy,upscale,classy,casual
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707.0,27.76659,-82.732983,3.5,5,...,,,,,,,,,,
13,jaxMSoInw8Poo3XeMJt8lQ,Adams Dental,15 N Missouri Ave,Clearwater,FL,33755.0,27.966235,-82.787412,5.0,10,...,,,,,,,,,,
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771.0,27.916116,-82.760461,4.5,100,...,False,False,False,False,False,False,False,False,False,True


In [198]:
florida_cities_postal_code.iloc[4]

business_id                                              0qNpTGTcqPwOLi2hADx4Xw
name                                                           Charlie's Market
address                                                        2815 E Sligh Ave
city                                                                      Tampa
state                                                                        FL
postal_code                                                             33610.0
latitude                                                               28.01036
longitude                                                            -82.430042
stars                                                                       3.0
review_count                                                                  9
is_open                                                                       1
attributes                    {'RestaurantsPriceRange2': '1', 'BusinessAccep...
categories                              

## Atributos:

## RestaurantsPriceRange2, BusinessAcceptsCreditCards, RestaurantsReservations, GoodForKids, RestaurantsDelivery, OutdoorSeating, Alcohol, RestaurantsGoodForGroups, Ambience

Ambience:
touristy, hipster, romantic, divey, intimate, trendy, upscale, classy, casual

## 2. Columna categories

In [199]:
print(florida_cities_postal_code.iloc[17970]['categories'])
print(type(florida_cities_postal_code.iloc[17970]['categories']))

Packing Services, Home Services, Movers, Local Services, Self Storage
<class 'str'>


In [200]:
df1 = florida_cities_postal_code.iloc[:30]
df2 = florida_cities_postal_code.iloc[4491:8982]
df2 = florida_cities_postal_code.iloc[8982:]

In [201]:
# Creamos lista de categorias.
florida_cities_postal_code.loc[:, 'categories_list'] = florida_cities_postal_code['categories'].str.split(', ')

# Expandimos la lista de categorias creada arriba.
df_expanded = florida_cities_postal_code.explode('categories_list')

# Filtramos de la df_expanded los solo las categorias de restaurants listadas en "top_category_names"
df_expanded_filtered = df_expanded[df_expanded['categories_list'].isin(top_category_names)]

#Creamos columnas en las que asignamos 1 a la fila que posee o tiene uno de las categorias filtradas en caso contrario le asigna 0.
category_columns = pd.crosstab(df_expanded_filtered.index, df_expanded_filtered['categories_list']).reset_index()

# Concatenamos las nuevas columnas al DataFrame original.
df_with_categories = pd.concat([florida_cities_postal_code.reset_index(drop=True), category_columns], axis=1)

# Dropeamos las columnas categories y categories_list.
columns_to_drop = ['categories', 'categories_list']
df_with_categories = df_with_categories.drop(columns=columns_to_drop)

florida_cities_postal_code.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,touristy,hipster,romantic,divey,intimate,trendy,upscale,classy,casual,categories_list
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707.0,27.76659,-82.732983,3.5,5,...,,,,,,,,,,"[Synagogues, Religious Organizations]"
13,jaxMSoInw8Poo3XeMJt8lQ,Adams Dental,15 N Missouri Ave,Clearwater,FL,33755.0,27.966235,-82.787412,5.0,10,...,,,,,,,,,,"[General Dentistry, Dentists, Health & Medical..."
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771.0,27.916116,-82.760461,4.5,100,...,False,False,False,False,False,False,False,False,True,"[Food, Delis, Italian, Bakeries, Restaurants]"


In [202]:
florida_cities_postal_code.iloc[17970]

business_id                                              Fck8i0fNQCa22ERz5Fa21w
name                                                          Thoughtful Moving
address                                                       5004 E Fowler Ave
city                                                                      Tampa
state                                                                        FL
postal_code                                                             33617.0
latitude                                                              28.054934
longitude                                                            -82.400832
stars                                                                       2.0
review_count                                                                 27
is_open                                                                       1
attributes                               {'BusinessAcceptsCreditCards': 'True'}
categories                    Packing Se

### 3. Columna hours

In [203]:
print(florida_cities_postal_code.iloc[2]['hours'])
print(type(florida_cities_postal_code.iloc[2]['hours']))

{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0', 'Wednesday': '10:0-20:0', 'Thursday': '10:0-20:0', 'Friday': '10:0-20:0', 'Saturday': '10:0-20:0'}
<class 'dict'>


In [204]:
flattened_datah = pd.json_normalize(florida_cities_postal_code['hours'])
df_unnestedh = pd.concat([florida_cities_postal_code, flattened_datah], axis=1)
df_unnestedh = df_unnestedh.drop(['hours'], axis=1)

In [205]:
florida_cities_postal_code = df_unnestedh.copy()
florida_cities_postal_code.head(3)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,classy,casual,categories_list,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707.0,27.76659,-82.732983,3.5,5,...,,,"[Synagogues, Religious Organizations]",,,,,,,
13,jaxMSoInw8Poo3XeMJt8lQ,Adams Dental,15 N Missouri Ave,Clearwater,FL,33755.0,27.966235,-82.787412,5.0,10,...,,,"[General Dentistry, Dentists, Health & Medical...",8:0-20:0,8:0-20:0,8:0-20:0,8:0-20:0,8:0-20:0,10:0-18:0,10:0-18:0
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771.0,27.916116,-82.760461,4.5,100,...,False,True,"[Food, Delis, Italian, Bakeries, Restaurants]",,16:0-21:0,16:0-21:0,16:0-21:0,12:0-22:30,12:0-22:30,12:0-19:0


In [206]:
florida_cities_postal_code.iloc[4]

business_id                                              0qNpTGTcqPwOLi2hADx4Xw
name                                                           Charlie's Market
address                                                        2815 E Sligh Ave
city                                                                      Tampa
state                                                                        FL
postal_code                                                             33610.0
latitude                                                               28.01036
longitude                                                            -82.430042
stars                                                                       3.0
review_count                                                                  9
is_open                                                                       1
attributes                    {'RestaurantsPriceRange2': '1', 'BusinessAccep...
categories                              

In [207]:
florida_cities_postal_code.fillna("sin datos", inplace=True)

In [208]:
florida_cities_postal_code.drop(columns=['attributes','categories','categories_list'])

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,upscale,classy,casual,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
7,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,FL,33707.0,27.76659,-82.732983,3.5,5,...,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos
13,jaxMSoInw8Poo3XeMJt8lQ,Adams Dental,15 N Missouri Ave,Clearwater,FL,33755.0,27.966235,-82.787412,5.0,10,...,sin datos,sin datos,sin datos,8:0-20:0,8:0-20:0,8:0-20:0,8:0-20:0,8:0-20:0,10:0-18:0,10:0-18:0
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771.0,27.916116,-82.760461,4.5,100,...,False,False,True,sin datos,16:0-21:0,16:0-21:0,16:0-21:0,12:0-22:30,12:0-22:30,12:0-19:0
50,Hwt3_mOEmU-t--ywcemnMg,Gold's Gym,203 - 38th Ave N,St. Petersburg,FL,33704.0,27.808339,-82.636168,3.0,17,...,sin datos,sin datos,sin datos,7:30-15:0,7:30-15:0,7:30-15:0,7:30-15:0,7:30-15:0,sin datos,sin datos
54,0qNpTGTcqPwOLi2hADx4Xw,Charlie's Market,2815 E Sligh Ave,Tampa,FL,33610.0,28.01036,-82.430042,3.0,9,...,sin datos,sin datos,sin datos,8:0-18:0,8:0-18:0,8:0-18:0,8:0-18:0,8:0-18:0,sin datos,sin datos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17967,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,...,sin datos,sin datos,sin datos,11:0-23:0,11:0-23:0,11:0-23:0,11:0-23:0,11:0-23:0,11:0-23:0,11:0-23:0
17968,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,...,sin datos,sin datos,sin datos,9:0-21:0,9:0-21:0,9:0-21:0,9:0-21:0,9:0-21:0,9:0-21:0,9:0-21:0
17970,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,...,sin datos,sin datos,sin datos,22:0-22:30,8:0-19:0,8:0-19:0,8:0-19:0,22:0-22:30,9:0-16:0,22:0-22:30
17971,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,sin datos,...,sin datos,sin datos,sin datos,11:30-23:0,11:30-23:0,11:30-23:0,11:30-23:0,11:30-0:30,11:30-0:30,11:30-23:0


In [209]:
florida_cities_postal_code.iloc[4]

business_id                                              0qNpTGTcqPwOLi2hADx4Xw
name                                                           Charlie's Market
address                                                        2815 E Sligh Ave
city                                                                      Tampa
state                                                                        FL
postal_code                                                             33610.0
latitude                                                               28.01036
longitude                                                            -82.430042
stars                                                                       3.0
review_count                                                                  9
is_open                                                                       1
attributes                    {'RestaurantsPriceRange2': '1', 'BusinessAccep...
categories                              

In [210]:
florida_cities_postal_code['GoodForKids'].value_counts()

sin datos    27650
True          5242
False          862
None             2
Name: GoodForKids, dtype: int64

In [211]:
florida_cities_postal_code.to_csv('/content/drive/MyDrive/Otros/yelp_business_florida_final.csv', index=False)