In [240]:
import os
import json
import random
import helper
import numpy as np
import pandas as pd
from xml.dom import minidom 
from haversine import haversine, Unit
from shapely import from_geojson, Polygon

In [241]:
random.seed(42)

In [242]:
directory = "data_in/shapes"
neighbourhoodsShapes = {}

for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
      with open(f) as file:
        geojson = json.load(file)
        poly = Polygon(geojson["coordinates"][0][0])
        neighbourhoodsShapes[filename.split(".")[0]] = poly
neighbourhoodsShapes["UniaodasfreguesiasdeAldoarFozdoDouroeNevogilde"] = neighbourhoodsShapes["Aldoar"].union(neighbourhoodsShapes["FozDoDouro"]).union(neighbourhoodsShapes["Nevogilde"])
neighbourhoodsShapes["UniaodasfreguesiasdeLordelodoOuroeMassarelos"] = neighbourhoodsShapes["LordeloDeOuro"].union(neighbourhoodsShapes["Massarelos"])
del neighbourhoodsShapes["Aldoar"]
del neighbourhoodsShapes["FozDoDouro"]
del neighbourhoodsShapes["Nevogilde"]
del neighbourhoodsShapes["LordeloDeOuro"]
del neighbourhoodsShapes["Massarelos"]

In [243]:
populationProportionPerc = 0.002

populationDF = pd.read_csv("data_out/population.csv")
populationDF["count"] = populationDF["count"].apply(lambda x: round(x*populationProportionPerc))

tripProfilesDF = pd.read_csv("data_in/trip_profiles.csv")

commutesDF = pd.read_csv("data_out/commutes.csv")
commutesDF["neighborhood"] = commutesDF["neighborhood"].apply(lambda x: x.replace(",","").replace(" ","").replace("ã","a").replace("ó","o").replace("é","e"))
commutesDF["destination"] = commutesDF["destination"].apply(lambda x: x.replace(",","").replace(" ","").replace("ã","a").replace("ó","o").replace("é","e"))

geometriesDF = pd.read_csv("data_out/geometries.csv")
geometriesDF["geometry"] = geometriesDF["geometry"].apply(lambda x: from_geojson(x))
populationDF = populationDF.merge(geometriesDF, left_on="section", right_on="section").drop(columns=["geometry"])

placesDF = pd.read_csv("data_out/places.csv")

In [244]:
population = []

for _, individuals in populationDF.iterrows():
    section, sex, category, count, sectionNeighborhood = individuals
    
    if count == 0:
        continue
    
    profileReduction = 1/populationDF[populationDF["type"] == category]["count"].sum()
    commuteReduction = 1/populationDF[populationDF["neighbourhood"] == sectionNeighborhood]["count"].sum()

    categoryProfiles = tripProfilesDF[tripProfilesDF["category"] == category]
    profilesOptions = categoryProfiles["profile"]
    profilesProbabilities = categoryProfiles["probability"]

    # This shouldnt be happaning but it is
    if profilesProbabilities.sum() <= 0:
        profilesProbabilities = [1]*len(profilesProbabilities)

    choosenProfiles = random.choices(list(profilesOptions), weights=list(profilesProbabilities), k=count)
    
    for profile in choosenProfiles:
        probability = tripProfilesDF[(tripProfilesDF["category"] == category) & (tripProfilesDF["profile"] == profile)]["probability"].iloc[0]
        tripProfilesDF.loc[(tripProfilesDF["category"] == category) & (tripProfilesDF["profile"] == profile), "probability"] = max(probability-profileReduction,0)
        
        sectionGeometry = geometriesDF[geometriesDF["section"] == section]["geometry"].iloc[0]

        homeLocation = helper.random_point_inside_polygon(sectionGeometry)
        
        occupation = None
        if "worker" in category or "autonomous" in category:
            occupation = "Empregada"
        elif "student" in category:
            occupation = "Estudante"

        workPlace = None
        if occupation != None:
            destinationsDF = commutesDF[(commutesDF["sex"] == sex) & (commutesDF["neighborhood"] == sectionNeighborhood) & (commutesDF["occupation"] == occupation)]

            destinationsOptions = destinationsDF["destination"]
            destinationsProbabilities = destinationsDF["probability"]

            # This shouldnt be happaning but it is
            if destinationsProbabilities.sum() <= 0:
                destinationsProbabilities = [1]*len(destinationsProbabilities)

            chosenDestination = random.choices(list(destinationsOptions), list(destinationsProbabilities), k=1)[0]
            probability = commutesDF[(commutesDF["sex"] == sex) & (commutesDF["neighborhood"] == sectionNeighborhood) & (commutesDF["occupation"] == occupation) & (commutesDF["destination"] == chosenDestination)]["probability"].iloc[0]
            commutesDF.loc[(commutesDF["sex"] == sex) & (commutesDF["neighborhood"] == sectionNeighborhood) & (commutesDF["occupation"] == occupation) & (commutesDF["destination"] == chosenDestination), "probability"] = max(probability-commuteReduction, 0)


        match category:
            case "worker_1st_sector":
                workPlaceType = "workplace_1st_sec"
            case "worker_2nd_sector":
                workPlaceType = "workplace_2nd_sec"
            case "worker_3rd_sector":
                workPlaceType = "workplace_3rd_sec"
            case "autonomous_or_employer":
                workPlaceType = "workplace_all"
            case "students_age_10_14":
                workPlaceType = "primary_school"
            case "students_age_15_19":
                workPlaceType = "secondary_school"
            case "students_age_20_24":
                workPlaceType = "university"
            case _:
                workPlaceType = None

        if workPlaceType != None:
            possiblePlaces = placesDF[(placesDF["category"] == workPlaceType) & (placesDF["neighbourhood"] == chosenDestination)]
            if len(possiblePlaces) > 0:
                workPlaceRow = possiblePlaces.sample()
                workPlace = workPlaceRow["longitude"].iloc[0], workPlaceRow["latitude"].iloc[0]
            else:
                destinationGeometry = neighbourhoodsShapes[chosenDestination]
                workPlace = helper.random_point_inside_polygon(destinationGeometry)
        
        entry = [sex, category, profile, homeLocation, workPlace]

        population.append(entry)
    
newPopDF = pd.DataFrame(population, columns=["sex", "category", "trip_profile", "home_location", "work_location"])
newPopDF.to_csv("data_out/synthetic_population.csv", index=False)

In [245]:
plans = {}
for i, row in newPopDF.iterrows():
    _, category, trip, home, work = row 
    plan = []
    lastPlace = None
    lasTime = None

    workStart = random.choices([6,7,8,9,10,12,14], [0.1, 0.2, 0.3, 0.2, 0.1, 0.05, 0.05], k=1)[0]
    work_time = random.choices([4,6,8],[0.1,0.1,0.8], k=1)[0]

    for order, activityType in enumerate(trip.split(" - ")):
        match activityType:
            case "H": #House
                entry = (activityType.lower(), home, workStart)

            case "W" | "C": #Work or school
                entry = (activityType.lower(), work, workStart+work_time)

            case "S" | "L" | "M": 
                match activityType:
                    case "S":   
                        placeCategory = "groceries"
                    case "L":   
                        placeCategory = "leisure"
                    case "M":   
                        placeCategory = "shop"

                placesByType = placesDF[placesDF["category"] == placeCategory]
                placesByType["distance"] = placesByType.apply(lambda r: haversine(lastPlace, (r["longitude"], r["latitude"]), unit=Unit.KILOMETERS), axis=1)

                filterByDistance = placesByType[placesByType["distance"] <= 2]

                if len(filterByDistance) == 0:
                    idx = placesByType["distance"].idxmin()
                    placesByType[placesByType.index == idx]

                else:
                    sample = filterByDistance.sample()
                
                activity = (sample["latitude"].iloc[0], sample["longitude"].iloc[0])

                time = min(random.uniform(0.5, 4) + lastTime, 23.99)

                entry = (activityType.lower(), activity, time)
        
        lastPlace = entry[1]
        lastTime = entry[2]
        plan.append(entry)

    plans[f"{category}_{i}"] = plan
final = plans

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placesByType["distance"] = placesByType.apply(lambda r: haversine(lastPlace, (r["longitude"], r["latitude"]), unit=Unit.KILOMETERS), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placesByType["distance"] = placesByType.apply(lambda r: haversine(lastPlace, (r["longitude"], r["latitude"]), unit=Unit.KILOMETERS), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/

In [246]:
with open("data_out/plans.json", "w") as file:
    file.write(json.dumps(plans, indent=1))

In [247]:
imp = minidom.getDOMImplementation()
doctype = imp.createDocumentType("plans", None, "http://www.matsim.org/files/dtd/plans_v4.dtd")
root = imp.createDocument(None, "plans", doctype)

plansEl = root.documentElement

for person, plans in final.items():
    personEl = root.createElement("person")
    personEl.setAttribute("id", str(person))
    
    planEl = root.createElement("plan")
    for i, plan in enumerate(plans):
        _type, (x,y), time = plan

        time = f"{str(int(time)).zfill(2)}:{ str(int(time-int(time))*60).zfill(2) }:00"

        actEl = root.createElement("act")
        actEl.setAttribute("type", _type)
        actEl.setAttribute("x", str(round(x, 10)))
        actEl.setAttribute("y", str(round(y, 10)))
        actEl.setAttribute("end_time", time)

        planEl.appendChild(actEl)
        if i != len(plans) - 1:
            legEl = root.createElement("leg")
            legEl.setAttribute("mode", "car")
            planEl.appendChild(legEl)

    personEl.appendChild(planEl)

    plansEl.appendChild(personEl)

xml_str = root.toprettyxml(indent="\t")

with open("data_out/plans.xml", "w") as f:
    f.write(xml_str)

print(xml_str)

<?xml version="1.0" ?>
<!DOCTYPE plans
  SYSTEM 'http://www.matsim.org/files/dtd/plans_v4.dtd'>
<plans>
	<person id="worker_3rd_sector_0">
		<plan>
			<act type="h" x="-8.6239094095" y="41.1633512603" end_time="12:00:00"/>
			<leg mode="car"/>
			<act type="w" x="-8.6133344694" y="41.1695647208" end_time="20:00:00"/>
			<leg mode="car"/>
			<act type="h" x="-8.6239094095" y="41.1633512603" end_time="12:00:00"/>
		</plan>
	</person>
	<person id="worker_3rd_sector_1">
		<plan>
			<act type="h" x="-8.668310447" y="41.1667154629" end_time="07:00:00"/>
			<leg mode="car"/>
			<act type="w" x="-8.6743595946" y="41.1545960263" end_time="15:00:00"/>
			<leg mode="car"/>
			<act type="l" x="41.1520696" y="-8.6774732" end_time="16:00:00"/>
			<leg mode="car"/>
			<act type="h" x="-8.668310447" y="41.1667154629" end_time="07:00:00"/>
		</plan>
	</person>
	<person id="worker_3rd_sector_2">
		<plan>
			<act type="h" x="-8.6612442999" y="41.1535981158" end_time="08:00:00"/>
			<leg mode="car"/>
			<