In [1]:
import os
import json
import pyproj
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.ops import transform
from shapely.geometry import Polygon
from shapely import to_geojson

In [2]:
data = gpd.read_file("data_in/C2021_SECCOES_1312.gpkg")
data.head()

Unnamed: 0,DT21,DTMN21,DTMNFR21,DTMNFRSEC21,SECNUM21,N_SS,NUTS1_15,NUTS2_15,NUTS3_15,N_EDIFICIOS_CLASSICOS,...,N_INDIVIDUOS_DOMESTICOS_M,N_INDIVIDUOS_REFORMADOS_M,N_INDIVIDUOS_EMPREG_SECT_PRIM_M,N_INDIVIDUOS_EMPREG_SECT_SEC_M,N_INDIVIDUOS_EMPREG_SECT_TERC_M,N_INDIVIDUOS_NAC_ESTRANGEIRA_M,N_INDIVIDUOS_RESID_FORA_PAIS_M,Shape_Length,Shape_Area,geometry
0,13,1312,131217,131217050,50,5,1,11,11A,173.0,...,18.0,165.0,0.0,18.0,231.0,43.0,142.0,1157.177049,82504.503024,"MULTIPOLYGON (((-40251.220 165968.917, -40385...."
1,13,1312,131217,131217049,49,10,1,11,11A,192.0,...,20.0,138.0,0.0,15.0,158.0,44.0,131.0,1416.668485,89771.452029,"MULTIPOLYGON (((-40628.367 166322.358, -40627...."
2,13,1312,131203,131203025,25,9,1,11,11A,316.0,...,25.0,146.0,0.0,15.0,139.0,3.0,47.0,4653.023374,500204.920751,"MULTIPOLYGON (((-36034.760 164495.326, -36034...."
3,13,1312,131203,131203017,17,12,1,11,11A,306.0,...,27.0,146.0,0.0,14.0,144.0,15.0,51.0,3929.423593,417943.606612,"MULTIPOLYGON (((-37556.365 165662.597, -37506...."
4,13,1312,131217,131217028,28,3,1,11,11A,162.0,...,36.0,207.0,0.0,21.0,283.0,76.0,172.0,1360.397149,91336.858127,"MULTIPOLYGON (((-41192.849 166127.603, -41230...."


In [3]:
pop = []
geometries = []
transformer = pyproj.Transformer.from_crs(data.crs.srs, "epsg:4326", always_xy=True)

for index,row in data.iterrows():
    #section, gender, category
    section = row["DTMNFRSEC21"]
    geometry = transform(transformer.transform, row["geometry"])
    geometries.append((section, geometry))
    for gender in ["M", "H"]:
        employed1Sec = row[f"N_INDIVIDUOS_EMPREG_SECT_PRIM_{gender}"]
        employed2Sec = row[f"N_INDIVIDUOS_EMPREG_SECT_SEC_{gender}"]
        employed3Sec = row[f"N_INDIVIDUOS_EMPREG_SECT_TERC_{gender}"]
        autonomousOrEmployer =  row[f"N_INDIVIDUOS_COM_ATIVIDADE_ECONOMICA_{gender}"] - (employed1Sec + employed2Sec + employed3Sec)
        if autonomousOrEmployer < 0:
            print(index, f"active {gender}")

        autonomousOrEmployer = max(autonomousOrEmployer,0)

        kids = row[f"N_INDIVIDUOS_10A14_{gender}"]
        teens = row[f"N_INDIVIDUOS_15A19_{gender}"]
        young = row[f"N_INDIVIDUOS_20A24_{gender}"]

        youth = kids+teens+young

        kidsToYouth = kids/youth
        teensToYouth = teens/youth

        students = row[f"N_INDIVIDUOS_ESTUDANTES_{gender}"]
        studentsKids = round(students*kidsToYouth)
        studentsTeens = round(students*teensToYouth)
        studentsYoung = students - (studentsKids + studentsTeens)

        housestay = row[f"N_INDIVIDUOS_DOMESTICOS_{gender}"]
        reformed = row[f"N_INDIVIDUOS_REFORMADOS_{gender}"]
        childrenUnder10 = row[f"N_INDIVIDUOS_0A4_{gender}"] + row[f"N_INDIVIDUOS_5A9_{gender}"]
        unemployed = row[f"N_INDIVIDUOS_SEM_ATIVIDADE_ECONOMICA_{gender}"] - (students+housestay+reformed+childrenUnder10)

        if unemployed < 0:
            print(index, f"not active {gender}")

        unemployed = max(unemployed, 0)
        
        entry = [(section, gender, "worker_1st_sector", employed1Sec),
                 (section, gender, "worker_2nd_sector", employed2Sec),
                 (section, gender, "worker_3rd_sector", employed3Sec),
                 (section, gender, "autonomous_or_employer", autonomousOrEmployer),
                 (section, gender, "students_age_10_14", studentsKids),
                 (section, gender, "students_age_15_19", studentsTeens),
                 (section, gender, "students_age_20_24", studentsYoung),
                 (section, gender, "housestay", housestay),
                 (section, gender, "reformed", reformed),
                 (section, gender, "children_under_10", childrenUnder10),
                 (section, gender, "undemployed", unemployed),
                 ]
        pop += entry
pop = pd.DataFrame(pop, columns=["section","gender", "type","count"])
geometries = pd.DataFrame(geometries, columns=["section", "geometry"])
geometries["neighbourhood"] = np.nan
pop.to_csv("data_out/population.csv", index=False)

In [4]:
directory = "data_in/shapes"
neighbourhoods = {}

for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
      with open(f) as file:
        geojson = json.load(file)
        poly = Polygon(geojson["coordinates"][0][0])
        neighbourhoods[filename.split(".")[0]] = poly
neighbourhoods["UniaodasfreguesiasdeAldoarFozdoDouroeNevogilde"] = neighbourhoods["Aldoar"].union(neighbourhoods["FozDoDouro"]).union(neighbourhoods["Nevogilde"])
neighbourhoods["UniaodasfreguesiasdeLordelodoOuroeMassarelos"] = neighbourhoods["LordeloDeOuro"].union(neighbourhoods["Massarelos"])
del neighbourhoods["Aldoar"]
del neighbourhoods["FozDoDouro"]
del neighbourhoods["Nevogilde"]
del neighbourhoods["LordeloDeOuro"]
del neighbourhoods["Massarelos"]

In [5]:
for i, row in geometries.iterrows():
    for name, shape in neighbourhoods.items():
        if shape.contains(row["geometry"].centroid):
            geometries.at[i, "neighbourhood"] = name
            break
    else:
        print("Something wrong")

In [6]:
geometries["geometry"] = geometries["geometry"].apply(lambda x: to_geojson(x))

In [7]:
geometries.to_csv("data_out/geometries.csv", index=False)