In [261]:
import pandas as pd
import numpy as np
import ast

pd.set_option('display.max_columns', None)

In [262]:
data = pd.read_csv("spain.csv", index_col=0)
data.head()

Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_detailed,popularity_generic,top_tags,price_level,price_range,meals,cuisines,special_diets,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,default_language,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value,atmosphere,keywords
320900,g10021880-d13763192,Taberna La Sacristia,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 19, 41870 Aznalcollar Spain",37.51928,-6.26885,Unclaimed,,#4 of 5 Restaurants in Aznalcollar,#4 of 6 places to eat in Aznalcollar,Spanish,,,,Spanish,,,N,N,N,,,,,3.0,1.0,English,1.0,0.0,0.0,1.0,0.0,0.0,,,,,
320901,g10021880-d15758746,Tasca el Capricho,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 9, 41870 Aznalcollar S...",37.52065,-6.26822,Unclaimed,,#3 of 5 Restaurants in Aznalcollar,#3 of 6 places to eat in Aznalcollar,Spanish,,,,Spanish,,,N,N,N,,,,,5.0,2.0,All languages,2.0,2.0,0.0,0.0,0.0,0.0,,,,,
320902,g10021880-d19332558,Bar Las Adelfas,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle Perdon N° 23 Capilla de La Cruz, 41870 A...",37.52428,-6.27144,Claimed,,#5 of 5 Restaurants in Aznalcollar,#5 of 6 places to eat in Aznalcollar,"Mediterranean, Spanish, Grill, Diner",,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Mediterranean, Spanish, Grill, Diner, Dining bars",,,N,N,N,"{""Mon"": [], ""Tue"": [""19:30-23:45""], ""Wed"": [""1...",6.0,51.0,6.0,3.0,2.0,All languages,2.0,1.0,0.0,0.0,0.0,1.0,,,,,
320903,g10021880-d19468788,El Rincon nº 7,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del Concejo 60, 41870 Aznalcollar Spain",37.51714,-6.2686,Claimed,,#1 of 5 Restaurants in Aznalcollar,#1 of 6 places to eat in Aznalcollar,"Mid-range, Steakhouse, Cafe, Spanish",€€-€€€,€2-€18,"Lunch, Dinner, Drinks","Steakhouse, Cafe, Dining bars, Spanish",,,N,N,N,"{""Mon"": [], ""Tue"": [], ""Wed"": [], ""Thu"": [""20:...",4.0,23.0,7.0,5.0,18.0,All languages,18.0,17.0,1.0,0.0,0.0,0.0,,,,,
320904,g10021880-d19847377,Nuevo jacaranda,"[""Europe"", ""Spain"", ""Andalucia"", ""Province of ...",Spain,Andalucia,Province of Seville,,"Calle del 28 de Febrero 2, 41870 Aznalcollar S...",37.52088,-6.26844,Claimed,,,,,,,,,,Reservations,N,N,N,,,,,,,,,,,,,,,,,,


In [263]:
df = data.copy() # hace una copia

In [264]:
# Nan en la columna city antes de crear la función
df["city"].isna().sum()

102884

In [265]:
# Convertir las cadenas de texto de original_location en listas
df['original_location'] = df['original_location'].apply(lambda x: ast.literal_eval(x))

In [266]:
# Función para actualizar la ciudad si es NaN
def actualizar_city_si_nan(row):
    """
    Actualiza el valor de la columna 'city' en un DataFrame basado en el contenido de la columna 'original_location'.
    
    Si el valor de 'city' es NaN, la función intenta obtener el último elemento de la lista en la columna 'original_location'.
    La lista en 'original_location' debe tener al menos un elemento para que se pueda extraer el valor.
    
    Parámetros:
    row (pd.Series): Una fila del DataFrame proporcionada por `apply`. Contiene las columnas 'city' y 'original_location'.
    
    Retorna:
    str o np.nan: Devuelve el último elemento de la lista en 'original_location' si 'city' es NaN y la lista no está vacía.
                  Si 'city' no es NaN, devuelve el valor actual de 'city'. Si 'original_location' no es una lista o es vacía,
                  devuelve np.nan.
    """
    if pd.isna(row['city']):
        if isinstance(row['original_location'], list) and len(row['original_location']) > 3:
            return row['original_location'][-1]
    return row['city']

# Aplicar la función a cada fila del DataFrame
df['city'] = df.apply(actualizar_city_si_nan, axis=1)

In [267]:
# Nan en la columna city después de la función obtener ciudad
df["city"].isna().sum()

520

In [268]:
df["province"].isna().sum()

29570

In [269]:
def actualizar_provincia_si_nan(row):
    """
    Actualiza el valor de la columna 'province' en un DataFrame con el valor de la columna 'region' si 'province' es NaN.
    
    Parámetros:
    row (pd.Series): Una fila del DataFrame proporcionada por `apply`. Contiene las columnas 'province' y 'region'.
    
    Retorna:
    str o np.nan: Devuelve el valor de 'region' si 'province' es NaN, de lo contrario devuelve el valor actual de 'province'.
    """
    if pd.isna(row['province']):
        return row['region']
    return row['province']

# Aplicar la función a cada fila del DataFrame para actualizar la columna 'province'
df['province'] = df.apply(actualizar_provincia_si_nan, axis=1)

In [270]:
df["province"].isna().sum()

2

In [273]:
df[df["province"].isna()].sample()

Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,claimed,awards,popularity_detailed,popularity_generic,top_tags,price_level,price_range,meals,cuisines,special_diets,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week,avg_rating,total_reviews_count,default_language,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value,atmosphere,keywords
354203,g187427-d21293370,Café Bar Los Luises,"[Europe, Spain]",Spain,,,,"Calle Antonio Andres Gonzalez, 17, 04431 Spain",40.4107,-3.727419,Unclaimed,,,,Spanish,,,,Spanish,,,N,N,N,,,,,4.0,1.0,English,1.0,0.0,1.0,0.0,0.0,0.0,,,,,


In [272]:
# Ver todo el contenido de la primera fila de la columna 'address'
print(df['address'].loc[474188])


Paseo Brusco 33, 39180 Noja Spain
