In [1]:
import pandas as pd
import numpy as np
from scipy.stats import truncnorm
import re
import random
import geopandas as gpd
from numba import jit, njit
from shapely import MultiPoint, Point
from shapely.ops import nearest_points

In [2]:
dwellings_file = './mtl/dd_2006.csv'
# jobs_file = './jj_2006_2138_snapped.csv'
persons_file = './mtl/TESTPERSONS.csv'

In [3]:
dwellings = pd.read_csv(dwellings_file)
d_cols = dwellings.columns
d = lambda x: d_cols.get_loc(x)
dwellings:np.ndarray = dwellings.values

persons = pd.read_csv(persons_file)
# nats = persons['nationality']
p_cols = persons.columns
p = lambda x: p_cols.get_loc(x)
persons:np.ndarray = persons[p_cols].values
persons = persons.astype(np.float32)

households = pd.DataFrame(columns=['id', 'dwelling', 'hhSize', 'autos'], dtype=int)
h_cols = households.columns
h = lambda x: h_cols.get_loc(x)
households:np.ndarray = households.values


In [4]:
@jit(nopython=True)
def findPartner(n, person):
    for i in range(n, persons.shape[0]):
        if persons[i][4]==1 and persons[i][1]==-1 and \
                                        abs(person[2] - persons[i][2]) < 10:
            # print(person[0], 'married to ', persons[i][0])
            return persons[i][0] # retur person id
    return None

In [5]:
@jit(nopython=True)
def firstLonelyChild(n):
    for i in range(n, persons.shape[0]):
        if persons[i][1]==-1 and persons[i][4]==2:
            lastIndex = i
            return int(persons[i][0])

In [6]:
@jit(nopython=True)
def nextUnhousedPerson(n):
    for i in range(n, persons.shape[0]):
        if persons[i][1]==-1:
            return int(persons[i][0])

In [7]:
@jit(nopython=True)
def getNextJob(n):
    for i in range(n, jobs.shape[0]):
        if jobs[i][2] == -1:
            return jobs[i][0]

HOUSEHOLD SYNTHESIS

for each dwelling
- create a household
  - has either a single person and 0-1 kids
  - OR married couple with 0-2 kids
- 

In [7]:
personNum = 0
lastChildIndex = 0
err = 1
for i, dwelling in enumerate(dwellings): # for each dwelling
    hhId = i
    if persons[personNum][1]==-1: # if current person is unhoused
        person = persons[personNum] # random.randint(0, len(persons)-1)
        personNum += 1
    else:
        next = nextUnhousedPerson(personNum) # skip to the index of the next unhoused person to save time
        if next == None:
            err=2
            break
        person = persons[next]
        personNum = int(person[0])
    persons[personNum][p('hhId')] = hhId
    hhSize = 1
    numKids=0
    if person[p('relationship')]==1: # married
        partner = findPartner(int(personNum), person)
        if partner == None: # can't find partner for person
            persons[personNum][p('relationship')]=0 # set person to single
            numKids = random.randint(0,1)
            hhSize += numKids
        else:  
            persons[int(partner)][1] = hhId
            numKids = random.randint(0,3)
            hhSize += numKids+1

        for j in range(numKids):
            child_ind = firstLonelyChild(lastChildIndex)
            if child_ind != None:
                persons[child_ind][1] = hhId
                lastChildIndex = child_ind
        

    elif person[p('relationship')]==0: # single
        numKids=random.randint(0,1)
        if numKids>0:
            child_ind = firstLonelyChild(lastChildIndex)
            if child_ind != None:
                persons[child_ind][1] = hhId
                lastChildIndex = child_ind
        hhSize += numKids

    households = np.r_[households, [[hhId, dwelling[d('id')], hhSize, random.randint(0,hhSize-numKids)]]]
    dwellings[i][d('hhId')] = hhId

    if i%10000==0:
        print(i)
    if i%150000==0:
        num=int(i/150000)
        print('writing to file:', num)
        pd.DataFrame(data=households, columns=h_cols).astype(int).to_csv('Montreal_hh_2006_'+str(num)+'.csv', index=False)
        households = np.empty([0,4])

if err==1:
    print("Exhausted Dwellings")
    print("last person", personNum)
elif err==2:
    print("All persons housed")


0
writing to file: 0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
writing to file: 1
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
writing to file: 2
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000
450000
writing to file: 3
460000
470000
480000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
writing to file: 4
610000
620000
630000
640000
650000
660000
670000
680000
690000
700000
710000
720000
730000
740000
750000
writing to file: 5
760000
770000
780000
790000
800000
810000
820000
830000
840000
850000
860000
870000
880000
890000
900000
writing to file: 6
910000
920000
930000
940000
950000
960000
970000
980000
990000
1000000
1010000
1020000
1030000
1040000
1050000
writing to file: 7
1060000
1070000
1080000
1090000
1100000
1110000
1120000
1130000
1140000
1150000
1160000
1170000
1180000
1190000
12

In [8]:
pd.DataFrame(data=households, columns=h_cols).astype(int).to_csv('./Montreal_hh_2006_final.csv', index=False)

In [9]:
pd.DataFrame(data=dwellings, columns=d_cols).to_csv('./dd_2006_linked_montreal.csv', index=False)
outputPersons = pd.DataFrame(data=persons, columns=p_cols)
# outputPersons['nationality'] = nats
outputPersons.to_csv('./pp_2006_linked_montreal.csv', index=False)

In [17]:
allHouseholds = pd.concat([pd.read_csv(f'./Output/hh_2006_{i}.csv')  for i in range(0,10)])

In [18]:
allHouseholds.to_csv('./Output/hh_2006_linked.csv', index=False)