In [3]:
# import gmapsdatalib as gmd
# import geopy.point
# import pickle

# #Creating the grid
# center = geopy.point.Point(41.37697984106034, 2.1632463314138177)
# tl = geopy.point.Point(41.3786301374207, 2.160993240829185)
# br = geopy.point.Point(41.37852548131351, 2.1653276904578296)
# x_step = 100
# y_step = 100
# grid = gmd.gridMaker(center, tl, br, x_step, y_step)

# #Save grid in a file
# with open('ids_list.pkl', 'wb') as file:
#     pickle.dump(l, file)

In [3]:
import gmapsdatalib as gmd
import geopy.point
import pickle

#Getting the list of restaurants (as dictionaries) from a file
restaurants_dict = gmd.pkl_files_to_list_of_dicts('ids_full_info')

In [4]:
import math
import pandas as pd


def get_postal_code_in_barcelona(restaurants_dict: list[dict]) -> list[str]:
    """Given a list of restaurants (as dictionaries) this function returns the postal code,
       if available, or a NaN entry, if the postal code is not available.

    Args:
        restaurants_dict (list[dict]): list of restaurants (as dictionaries)

    Returns:
        list[str]: List of postal codes (as strings).
    """
    #List to return
    postal_codes = []

    for restaurant in restaurants_dict:
        #To keep control if the postal code is available
        found = False
        #For each restaurant, we check the 'address_components' info
        for i in range(len(restaurant['address_components'])):
            #We identify the position in the list of the level type "postal_code"
            level_type = restaurant['address_components'][i]['types'][0]
            if level_type == 'postal_code':
                #Once the level type postal_code is found, we include it in the list
                postal_codes.append(restaurant['address_components'][i]['long_name'])
                found = True
                break
        if not found:
            postal_codes.append(math.nan)
    return postal_codes

def get_simple_category(restaurants_dict: list[dict],
                        category: str) -> list[str]:
    """This funcion gets two arguments.
       The first one is a list of restaurants (as dictionaries).
       The second one is a "simple category", being this a key in a restaurant's dictionary whose value
       is a single variable (that is, neither a list or a dictionary). "Simple category" are:
       
       'adr_address', 'business_status', 'dine_in', 'formatted_address', 'formatted_phone_number', 'icon',
       'icon_background_color', 'icon_mask_base_uri', 'international_phone_number', 'name', 'place_id',
       'rating', 'reference', 'reservable', 'serves_beer', 'serves_breakfast', 'serves_brunch', 'serves_dinner',
       'serves_lunch', 'serves_vegetarian_food', 'serves_wine', 'takeout', 'url', 'user_ratings_total', 'utc_offset',
       'vicinity', 'website' and 'wheelchair_accessible_entrance'


       The function creates then a list with the info of the specified category, if available, or a NaN entry,
       if the the info is not available.

    Args:
        restaurants_dict (list[dict]): list of restaurants (as dictionaries).
        category (str): name of the category of interest. 
    Returns:
        list[str]: list of the category info for each restaurant.
    """
    #List to return
    category_info = []
    
    for restaurant in restaurants_dict:
        try:
            category_info.append(restaurant[category])
        except:
            category_info.append(math.nan)
    
    return category_info

def get_nbh_distr_from_pc(df:pd.DataFrame) -> pd.DataFrame:
    """This function should be provided with a DataFrame of restaurants with a 'Postal Code' column
       as an argument. Then, based on the postal code, it will create two additional columns to the
       provided DataFrame,  one for the neighbourhood and one for the district.

       If the 'Postal Code' column in the DataFrame has a non-valid entry, then it returns both
       neighbourhood and district as a NaN values.

    Args:
        df (DataFrame): DataFrame of restaurants with a 'Postal Code' column.

    Returns:
        DataFrame: Original data frame with two additional columns: "Neighbourhood" and "District"
    """    
    neighbourhoods = {
        '08001' : 'El Raval',
        '08002' : 'Barri Gotic',
        '08003' : 'Barceloneta',
        '08004' : 'Poble Sec',
        '08005' : 'Poblenou',
        '08006' : 'El Farro',
        '08007' : 'Antiga Esquerra Eixample',
        '08008' : 'Antiga Esquerra Eixample',
        '08009' : 'Dreta Eixample',
        '08010' : 'Dreta Eixample',
        '08011' : 'Esquerra Eixample',
        '08012' : 'Vila de Gracia',
        '08013' : 'El Fort Pienc',
        '08014' : 'La Bordeta - Les Corts',
        '08015' : 'Nova Esquerra Exiample - Sant Antoni',
        '08016' : 'Porta-La Prosperitat',
        '08017' : 'Vallvidrera',
        '08018' : 'El Clot',
        '08019' : 'El Besos - Sant Marti',
        '08020' : 'La Verneda - Sant Marti de Provençals',
        '08021' : 'Les Corts - Sarria',
        '08022' : 'Sarria - La Bonanova',
        '08023' : 'El Coll',
        '08024' : 'Gracia - Can Baro',
        '08025' : "Camps d'en Grassot i Gracia Nova",
        '08026' : "Camp de l'arpa",
        '08027' : "El Congres - Navas",
        '08028' : "Les Corts",
        '08029' : "Sarria",
        '08030' : "San Andres Palomar",
        '08031' : "Horta",
        '08032' : "La Font d'en Fargues",
        '08033' : "Trinitat - Torre Baro",
        '08034' : "Pedralbes",
        '08035' : "Sant Genis dels Agudells",
        '08036' : "Esquerra Eixample",
        '08037' : "Dreta Eixample",
        '08038' : "Montjuic",
        '08039' : "Port de Barcelona",
        '08040' : "Zona Franca",
        '08041' : "El Guinardo",
        '08042' : "Canyelles - Roquetes"
    }
    districts = {
        'El Raval' : 'Ciutat Vella',
        'Barri Gotic' : 'Ciutat Vella',
        'Barceloneta' : 'Ciutat Vella',
        'Poble Sec' : 'Sants - Montjuic',
        'Poblenou' : 'Sant Marti',
        'El Farro' : 'Sarria - Sant Gervasi',
        'Antiga Esquerra Eixample' : 'Eixample',
        'Dreta Eixample' : 'Eixample',
        'Esquerra Eixample' : 'Eixample',
        'Vila de Gracia' : 'Gracia',
        'El Fort Pienc' : 'Eixample',
        'La Bordeta - Les Corts' : 'Sants - Montjuic',
        'Nova Esquerra Exiample - Sant Antoni' : 'Eixample',
        'Porta-La Prosperitat' : 'Nou Barris',
        'Vallvidrera' : 'Sarria - Sant Gervasi',
        'El Clot' : 'Sant Marti',
        'El Besos - Sant Marti' : 'Sant Marti',
        'La Verneda - Sant Marti de Provençals' : 'Sant Marti',
        'Les Corts - Sarria' : 'Les Corts',
        'Sarria - La Bonanova' : 'Les Corts',
        'El Coll' : 'Gracia',
        'Gracia - Can Baro' : 'Gracia',
        "Camps d'en Grassot i Gracia Nova" : 'Gracia',
        "Camp de l'arpa" : 'Sant Marti',
        "El Congres - Navas" : 'Sant Andreu',
        "Les Corts" : 'Les Corts',
        "Sarria" : 'Sarria - Sant Gervasi',
        "San Andres Palomar" : 'Sant Andreu',
        "Horta" : 'Horta - Guinardo',
        "La Font d'en Fargues" : 'Horta - Guinardo',
        "Trinitat - Torre Baro" : 'Nou Barris',
        "Pedralbes" : 'Les Corts',
        "Sant Genis dels Agudells" : 'Horta - Guinardo',
        "Montjuic" : 'Sants - Montjuic',
        "Port de Barcelona" : 'Sants - Montjuic',
        "Zona Franca" : "Sants - Montjuic",
        "El Guinardo" : 'Horta - Guinardo',
        "Canyelles - Roquetes" : 'Nou Barris'
    }

    nbh_list = []
    dis_ist = []

    for postal_code in df['Postal Code']:
        try:
            nbh = neighbourhoods[postal_code]
        except:
            nbh_list.append(math.nan)
            dis_ist.append(math.nan)
        else:
            nbh_list.append(nbh)
            dis = districts[nbh]
            dis_ist.append(dis)
    
    df['Neighbourhood'] = nbh_list
    df['District'] = dis_ist

    return df

In [5]:
#Getting only restaurants in Barcelona
import restaurantManaging as rM

barcelona_restaurants = rM.get_restaurants_in_barcelona(restaurants_dict)


In [7]:
len(barcelona_restaurants)

10577

In [15]:
categories_of_interest = [
    'name',
    'formatted_address',
    'rating',
    'delivery',
    'dine_in',
    'reservable',
    'serves_beer',
    'serves_brunch',
    'serves_dinner',
    'serves_lunch',
    'serves_vegetarian_food',
    'serves_wine',
    'takeout',
    'user_ratings_total',
    'vicinity',
    'wheelchair_accessible_entrance',
]

dict_to_df = {}
for category in categories_of_interest:
    dict_to_df[category] = get_simple_category(barcelona_restaurants, category)

In [19]:
lat = []
lng = []

for restaurant in barcelona_restaurants:
    latitude = restaurant['geometry']['location']['lat']
    longitude = restaurant['geometry']['location']['lng']

    lat.append(latitude)
    lng.append(longitude)

In [22]:
dict_to_df['Latitude'] = lat
dict_to_df['Longitude'] = lng

In [24]:
dict_to_df['Postal Code'] = get_postal_code_in_barcelona(barcelona_restaurants)

In [27]:
df = pd.DataFrame(dict_to_df)

In [28]:
df.head()

Unnamed: 0,name,formatted_address,rating,delivery,dine_in,reservable,serves_beer,serves_brunch,serves_dinner,serves_lunch,serves_vegetarian_food,serves_wine,takeout,user_ratings_total,vicinity,wheelchair_accessible_entrance,Latitude,Longitude,Postal Code
0,LA CHULA GASTROBAR,"Carrer d'Espalter, 4, 08001 Barcelona, Spain",4.4,False,True,True,True,True,True,True,True,True,True,62.0,"Carrer d'Espalter, 4, Barcelona",True,41.378554,2.170644,8001
1,Tokyo-Ya,"C/ de Balmes, 197, 08006 Barcelona, Spain",4.4,,,,,,,,,,,335.0,"Carrer de Balmes, 197, Barcelona",True,41.397146,2.151969,8006
2,M.A.M.I. Italian Gastrobar,"Av. Diagonal, 366, 08013 Barcelona, Spain",4.9,,True,True,True,True,True,True,True,True,True,162.0,"Avinguda Diagonal, 366, Barcelona",True,41.39961,2.17111,8013
3,Maska GastroPub,"C/ de Pau Claris, 117, 08009 Barcelona, Spain",4.9,True,True,True,True,,True,True,True,True,True,140.0,"Carrer de Pau Claris, 117, Barcelona",True,41.392141,2.167801,8009
4,Restaurant Núria Pous,"C. de Santander, 16, 08020 Barcelona, Spain",4.3,True,True,,True,True,,True,False,True,True,52.0,"Carrer de Santander, 16, Barcelona",True,41.423839,2.19818,8020


In [30]:
df = df.dropna(subset=['Postal Code'])

In [32]:
df = get_nbh_distr_from_pc(df)

In [34]:
df.to_csv('nbhoods.csv', index=False)

In [38]:
boolean_columns = [
    'delivery',
    'dine_in',
    'reservable',
    'serves_beer',
    'serves_brunch',
    'serves_dinner',
    'serves_lunch',
    'serves_vegetarian_food',
    'serves_wine',
    'takeout',
]

for column in boolean_columns:
    df[column] = df[column].astype(float).fillna(0).astype(int)

In [39]:
df.head()

Unnamed: 0,name,formatted_address,rating,delivery,dine_in,reservable,serves_beer,serves_brunch,serves_dinner,serves_lunch,...,serves_wine,takeout,user_ratings_total,vicinity,wheelchair_accessible_entrance,Latitude,Longitude,Postal Code,Neighbourhood,District
0,LA CHULA GASTROBAR,"Carrer d'Espalter, 4, 08001 Barcelona, Spain",4.4,0,1,1,1,1,1,1,...,1,1,62.0,"Carrer d'Espalter, 4, Barcelona",True,41.378554,2.170644,8001,El Raval,Ciutat Vella
1,Tokyo-Ya,"C/ de Balmes, 197, 08006 Barcelona, Spain",4.4,0,0,0,0,0,0,0,...,0,0,335.0,"Carrer de Balmes, 197, Barcelona",True,41.397146,2.151969,8006,El Farro,Sarria - Sant Gervasi
2,M.A.M.I. Italian Gastrobar,"Av. Diagonal, 366, 08013 Barcelona, Spain",4.9,0,1,1,1,1,1,1,...,1,1,162.0,"Avinguda Diagonal, 366, Barcelona",True,41.39961,2.17111,8013,El Fort Pienc,Eixample
3,Maska GastroPub,"C/ de Pau Claris, 117, 08009 Barcelona, Spain",4.9,1,1,1,1,0,1,1,...,1,1,140.0,"Carrer de Pau Claris, 117, Barcelona",True,41.392141,2.167801,8009,Dreta Eixample,Eixample
4,Restaurant Núria Pous,"C. de Santander, 16, 08020 Barcelona, Spain",4.3,1,1,0,1,1,0,1,...,1,1,52.0,"Carrer de Santander, 16, Barcelona",True,41.423839,2.19818,8020,La Verneda - Sant Marti de Provençals,Sant Marti


In [37]:
df['Columna1'] = df['Columna1'].astype(float).fillna(0).astype(int)

Neighbourhood
Antiga Esquerra Eixample                 358
Barceloneta                              402
Barri Gotic                              328
Camp de l'arpa                           112
Camps d'en Grassot i Gracia Nova         230
Canyelles - Roquetes                      62
Dreta Eixample                           389
El Besos - Sant Marti                    141
El Clot                                  216
El Coll                                   39
El Congres - Navas                       129
El Farro                                 181
El Fort Pienc                            215
El Guinardo                               68
El Raval                                 278
Esquerra Eixample                        481
Gracia - Can Baro                         97
Horta                                     60
La Bordeta - Les Corts                   277
La Font d'en Fargues                      47
La Verneda - Sant Marti de Provençals     97
Les Corts                                