In [1]:
import os
import json
import pandas as pd
from shapely.geometry import Polygon, Point

In [2]:
data = pd.read_csv("data_in/porto_poi.csv")
data = data.drop_duplicates(subset=["name", "type","address","freguesia"])
data.head()

Unnamed: 0,name,type,address,latitude,longitude,freguesia,opening_hours
0,Carli Borrachas e Plásticos,Borrachas_e_Plasticos,Rua do Almada,41.150637,-8.612184,,
1,Casa Hortícola,agrarian,,41.148854,-8.607671,,
2,Cafe hotel linha 22,alcohol,Rua dos Clérigos 23,41.145868,-8.612787,,
3,Vinoteca,alcohol,,41.142329,-8.614073,,
4,Agarrafeira Wine House,alcohol,,41.149906,-8.613833,,


In [3]:
schools = data[data["type"] == "school"]
primarySchools = schools[schools["name"].str.contains("Escola Básica", na=False)]
secondarySchool = schools[schools["name"].str.contains("Escola Secundária", na=False) | schools["name"].str.contains("Escola Básica e Secundária", na=False)]
universities = data[(data["type"] == "college") | (data["type"] == "university")]

In [4]:
thirdSectorWorkPlacesTypes = [ 'animal_shelter', 'antiques', 'art', 'art_gallery', 'bank', 'batteries', 'beauty', 'charity', 'chemist', 'civic', 'clinic', 'college', 'commercial', 'community_centre', 'concert_hall', 'convenience', 'cosmetics', 'courthouse', 'coworking_space', 'dancing_school', 'deli', 'dentist', 'department_store', 'doctors', 'dojo', 'driving_school', 'drugstore', 'electronics', 'estate_agent', 'fast_food', 'fire_station', 'government', 'grocery', 'hairdresser', 'hardware', 'health_food', 'herbalist', 'hospital', 'hotel', 'ice_cream', 'industrial', 'interior_decoration', 'jewelry', 'kindergarten', 'language_school', 'library', 'mall', 'marketplace', 'museum', 'music', 'office', 'optician', 'perfumery', 'pet', 'pharmacy', 'police', 'pottery', 'research_institute', 'restaurant', 'retail', 'school', 'service', 'shelter', 'shoe_repair', 'shoes', 'sport_club', 'sports', 'sports_centre', 'stable', 'supermarket', 'veterinary']
thirdSectorWorkPlaces = data[data["type"].isin(thirdSectorWorkPlacesTypes)]

secondSectorWorkPlacesTypes = [ 'construction', 'factory', 'hardware', 'industrial', 'recycling', 'sewing', 'shed', 'shoe_repair','warehouse']
secondSectorWorkPlaces = data[data["type"].isin(secondSectorWorkPlacesTypes)]

firstSectorWorkPlacesTypes = ['agrarian','farm']
firstSectorWorkPlaces = data[data["type"].isin(firstSectorWorkPlacesTypes)]


In [5]:
shopTypes = [ 'beauty', 'books', 'clothes', 'commercial', 'department_store', 'electronics', 'fashion_accessories', 'florist', 'interior_decoration', 'mall', 'marketplace', 'perfumery', 'shoes', 'toys',]
groceriesTypes = [ 'bakery', 'cafe', 'cheese', 'fast_food', 'grocery', 'health_food', 'herbalist', 'pastry', 'seafood', 'supermarket', 'tea']
leisureTypes = [ 'art', 'art_gallery', 'arts_centre', 'bar', 'cafe', 'cinema', 'coffee', 'dojo', 'fast_food', 'games', 'ice_cream', 'museum', 'music', 'nightclub', 'restaurant', 'sport_club', 'sports', 'sports_centre', 'stadium', 'swimming_pool', 'tattoo', 'video_games']

shops = data[data["type"].isin(shopTypes)]
groceries = data[data["type"].isin(groceriesTypes)]
leisurePlaces = data[data["type"].isin(leisureTypes)]

In [6]:
directory = "data_in/shapes"
neighbourhoods = {}

for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
      with open(f) as file:
        geojson = json.load(file)
        poly = Polygon(geojson["coordinates"][0][0])
        neighbourhoods[filename.split(".")[0]] = poly
neighbourhoods["UniaodasfreguesiasdeAldoarFozdoDouroeNevogilde"] = neighbourhoods["Aldoar"].union(neighbourhoods["FozDoDouro"]).union(neighbourhoods["Nevogilde"])
neighbourhoods["UniaodasfreguesiasdeLordelodoOuroeMassarelos"] = neighbourhoods["LordeloDeOuro"].union(neighbourhoods["Massarelos"])
del neighbourhoods["Aldoar"]
del neighbourhoods["FozDoDouro"]
del neighbourhoods["Nevogilde"]
del neighbourhoods["LordeloDeOuro"]
del neighbourhoods["Massarelos"]

In [7]:
places = []
categories = {
    "primary_school":primarySchools,
    "secondary_school":secondarySchool,
    "university":universities,
    "workplace_1st_sec":firstSectorWorkPlaces,
    "workplace_2nd_sec":secondSectorWorkPlaces,
    "workplace_3rd_sec":thirdSectorWorkPlaces,
    "workplace_all":pd.concat([firstSectorWorkPlaces, secondSectorWorkPlaces, thirdSectorWorkPlaces]),
    "shop":shops,
    "groceries":groceries,
    "leisure":leisurePlaces
}

for category, df in categories.items():
    for _, row in df.iterrows():
        y,x = row["latitude"], row["longitude"]

        neighbourhood = None

        for name, shape in neighbourhoods.items():
            if shape.contains(Point(x,y)):
                neighbourhood = name
                break
        else:
            print(category,"Out of bounds")
            continue

        entry = (category, y, x, name)

        places.append(entry)

placesDF = pd.DataFrame(places, columns=["category", "latitude", "longitude", "neighbourhood"])
placesDF.to_csv("data_out/places.csv", index=False)