In [7]:
import pandas as pd
import numpy as np
import torch

from data_util.dataset import CityData
from model.regiondcl import PatternEncoder, RegionEncoder
from model.trainer import PatternTrainer, RegionTrainer

#### Arguments used by subsequent code

In [2]:
class Args():
    pass
args = Args()

args.city = "Paris"
args.no_random = False
args.fixed = False
args.dim = 64
args.d_feedforward = 1024
args.building_head = 8
args.building_layers = 2
args.building_dropout = 0.2
args.building_activation = 'relu'
args.bottleneck_head = 8
args.bottleneck_layers = 2
args.bottleneck_dropout = 0.2
args.bottleneck_activation = 'relu'
args.lr=0.0001
args.weight_decay=0.0001
args.gamma=0.999
args.save_name='pattern_embedding'

#### Pattern training

In [3]:
city_data = CityData(args.city, with_random=not args.no_random)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
pattern_encoder = PatternEncoder(d_building=city_data.building_feature_dim,
                                 d_poi=city_data.poi_feature_dim,
                                 d_hidden=args.dim,
                                 d_feedforward=args.d_feedforward,
                                 building_head=args.building_head,
                                 building_layers=args.building_layers,
                                 building_dropout=args.building_dropout,
                                 building_distance_penalty=1,
                                 building_activation=args.building_activation,
                                 bottleneck_head=args.bottleneck_head,
                                 bottleneck_layers=args.bottleneck_layers,
                                 bottleneck_dropout=args.bottleneck_dropout,
                                 bottleneck_activation=args.bottleneck_activation).to(device)

# Encode building patterns.
pattern_optimizer = torch.optim.Adam(pattern_encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
pattern_scheduler = torch.optim.lr_scheduler.StepLR(pattern_optimizer, step_size=1, gamma=args.gamma)
pattern_trainer = PatternTrainer(city_data, pattern_encoder, pattern_optimizer, pattern_scheduler)
pattern_trainer.train_pattern_contrastive(epochs=20, save_name=args.save_name)
print('Pattern (building groups) training finished. Embeddings have been saved in embeddings/ directory.')

Epoch 1: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.87it/s, loss=0.885]


Epoch 1: InfoNCE Loss 2.8697494361135694


Epoch 2: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:23<00:00,  1.90it/s, loss=0.685]


Epoch 2: InfoNCE Loss 1.6125569873385959


Epoch 3: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.87it/s, loss=0.501]


Epoch 3: InfoNCE Loss 1.1375351442231072


Epoch 4: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.87it/s, loss=0.336]


Epoch 4: InfoNCE Loss 0.915105895863639


Epoch 5: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:23<00:00,  1.89it/s, loss=0.317]


Epoch 5: InfoNCE Loss 0.7960723929935032


Epoch 6: 100%|███████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.84it/s, loss=0.17]


Epoch 6: InfoNCE Loss 0.6854121244615978


Epoch 7: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.85it/s, loss=0.107]


Epoch 7: InfoNCE Loss 0.6158585439125697


Epoch 8: 100%|██████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.85it/s, loss=0.382]


Epoch 8: InfoNCE Loss 0.5474309768941668


Epoch 9: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.87it/s, loss=0.0584]


Epoch 9: InfoNCE Loss 0.4918755794564883


Epoch 10: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:24<00:00,  1.84it/s, loss=0.119]


Epoch 10: InfoNCE Loss 0.4511387017038133


Epoch 11: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:20<00:00,  2.15it/s, loss=0.138]


Epoch 11: InfoNCE Loss 0.32333849171797435


Epoch 12: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.11it/s, loss=0.066]


Epoch 12: InfoNCE Loss 0.3384299533234702


Epoch 13: 100%|████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.07it/s, loss=0.0618]


Epoch 13: InfoNCE Loss 0.290262867593103


Epoch 14: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.136]


Epoch 14: InfoNCE Loss 0.28456874423556855


Epoch 15: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.08it/s, loss=0.342]


Epoch 15: InfoNCE Loss 0.2683285292651918


Epoch 16: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.08it/s, loss=0.177]


Epoch 16: InfoNCE Loss 0.27664744953314463


Epoch 17: 100%|████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.13it/s, loss=0.0314]


Epoch 17: InfoNCE Loss 0.23765787109732628


Epoch 18: 100%|█████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.10it/s, loss=0.338]


Epoch 18: InfoNCE Loss 0.24812058607737222


Epoch 19: 100%|████████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.10it/s, loss=0.0685]


Epoch 19: InfoNCE Loss 0.22100412083996668


Epoch 20: 100%|███████████████████████████████████████████████████████████| 45/45 [00:21<00:00,  2.10it/s, loss=0.00721]


Epoch 20: InfoNCE Loss 0.2244450529002481
Pattern (building groups) training finished. Embeddings have been saved in embeddings/ directory.


In [5]:
embeddings = np.load(f'embeddings/{args.city}/{args.save_name}_20.npy')
region_aggregator = RegionEncoder(d_hidden=args.dim, d_head=8)
region_aggregator.to(device)
region_optimizer = torch.optim.Adam(region_aggregator.parameters(), lr=args.lr, weight_decay=args.weight_decay)
region_scheduler = torch.optim.lr_scheduler.StepLR(region_optimizer, step_size=1, gamma=args.gamma)
region_trainer = RegionTrainer(city_data, 
                               pattern_encoder, 
                               pattern_optimizer, 
                               pattern_scheduler, 
                               region_aggregator,
                               region_optimizer, 
                               region_scheduler)

region_trainer.train_region_triplet_freeze(epochs=20, 
                                           embeddings=embeddings,
                                           adaptive=not args.fixed,
                                           save_name='RegionDCL_',
                                           window_sizes=[1000, 2000, 3000])

print('Training finished. Region embeddings have been saved in embeddings/ directory.')

Building pretraining dataset...


Epoch 1: 100%|███████████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 118.02it/s, loss=0]


Epoch 1, Tiplet Loss: 5.733050794211048


Epoch 2: 100%|████████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 111.11it/s, loss=7.33]


Epoch 2, Tiplet Loss: 4.783924699122548


Epoch 3: 100%|██████████████████████████████████████████████████████████| 2839/2839 [00:23<00:00, 120.85it/s, loss=18.3]


Epoch 3, Tiplet Loss: 6.123286394553942


Epoch 4: 100%|███████████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 119.09it/s, loss=0]


Epoch 4, Tiplet Loss: 4.719038029720909


Epoch 5: 100%|███████████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 113.56it/s, loss=0]


Epoch 5, Tiplet Loss: 4.11406084477755


Epoch 6: 100%|█████████████████████████████████████████████████████████████| 2839/2839 [00:23<00:00, 121.19it/s, loss=0]


Epoch 6, Tiplet Loss: 6.042075970089742


Epoch 7: 100%|███████████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 117.27it/s, loss=0]


Epoch 7, Tiplet Loss: 4.057916764487997


Epoch 8: 100%|███████████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 112.99it/s, loss=0]


Epoch 8, Tiplet Loss: 3.83761165534542


Epoch 9: 100%|██████████████████████████████████████████████████████████| 2839/2839 [00:23<00:00, 120.02it/s, loss=17.3]


Epoch 9, Tiplet Loss: 5.895370361291846


Epoch 10: 100%|███████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 116.94it/s, loss=16.1]


Epoch 10, Tiplet Loss: 4.455195023720725


Epoch 11: 100%|███████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 112.80it/s, loss=17.3]


Epoch 11, Tiplet Loss: 3.3467080013172046


Epoch 12: 100%|█████████████████████████████████████████████████████████| 2839/2839 [00:23<00:00, 119.68it/s, loss=4.79]


Epoch 12, Tiplet Loss: 5.868094685128895


Epoch 13: 100%|██████████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 116.53it/s, loss=0]


Epoch 13, Tiplet Loss: 3.7715611318398636


Epoch 14: 100%|██████████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 112.17it/s, loss=0]


Epoch 14, Tiplet Loss: 3.745418799304259


Epoch 15: 100%|████████████████████████████████████████████████████████████| 2839/2839 [00:23<00:00, 119.80it/s, loss=0]


Epoch 15, Tiplet Loss: 5.485282574459128


Epoch 16: 100%|██████████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 115.34it/s, loss=0]


Epoch 16, Tiplet Loss: 3.596848978633769


Epoch 17: 100%|██████████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 112.85it/s, loss=0]


Epoch 17, Tiplet Loss: 3.21716950625108


Epoch 18: 100%|████████████████████████████████████████████████████████████| 2839/2839 [00:23<00:00, 121.51it/s, loss=0]


Epoch 18, Tiplet Loss: 5.502993434951019


Epoch 19: 100%|██████████████████████████████████████████████████████████████| 855/855 [00:07<00:00, 118.58it/s, loss=0]


Epoch 19, Tiplet Loss: 3.892980766296387


Epoch 20: 100%|██████████████████████████████████████████████████████████████| 407/407 [00:03<00:00, 113.86it/s, loss=0]


Epoch 20, Tiplet Loss: 2.558074777776545
Training finished. Region embeddings have been saved in embeddings/ directory.


#### Fondere i region embedding prodotti da RegionDCL con le celle IRIS di Parigi. Gli indici degli embedding dovrebbero essere uguali agli indici in region_downstream.pkl, il quale contiene come regioni le celle IRIS di Parigi. A sua volta, questo e' stato creato dal dataframe originario contenente le celle IRIS di Parigi. Ci dovrebbe quindi essere una corrispondenza 1-1.

In [46]:
region_embs_dict = pd.read_pickle('embeddings/Paris/RegionDCL_20.pkl')
region_embeddings = pd.DataFrame(region_embs_dict.values(), index=region_embs_dict.keys())
display(region_embeddings)

IRIS_Paris_embeddings = pd.read_parquet('IRIS Paris.parquet')
IRIS_Paris_embeddings[region_embeddings.columns] = region_embeddings[:]
del IRIS_Paris_embeddings['code_commune'], IRIS_Paris_embeddings['geometry']
IRIS_Paris_embeddings.set_index('code_iris', inplace = True)

display(IRIS_Paris_embeddings)
IRIS_Paris_embeddings.to_parquet('RegionDCL_IRIS_Paris_embeddings.parquet')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.155640,0.541241,-1.552484,0.356977,-1.736598,0.142796,-1.446629,1.474845,1.793473,-2.367339,...,-0.648237,-0.885627,-0.937770,0.419687,0.025002,-1.014766,-1.895285,-0.646993,-1.090850,-0.565864
1,1.347954,0.650689,0.018240,0.584189,-0.199877,-0.448162,0.299191,-0.175236,-0.107955,-1.175052,...,1.211076,-0.181422,-0.476251,0.415405,-1.052445,-0.799308,-1.164737,-0.697996,0.541870,-1.378236
2,-0.698959,0.896405,-0.875261,0.391883,-1.496848,-0.138475,-1.719965,2.227425,1.232168,-2.249786,...,-0.267723,-0.522346,-0.962249,-0.219073,0.724396,-0.248601,-1.354952,-0.742301,-1.021840,-0.666556
3,-0.209782,-0.157673,-0.647407,0.808782,-1.313741,1.138889,-1.547369,1.824086,-0.395828,-1.862130,...,0.210744,-0.098627,-1.785979,-0.718676,1.533811,1.153300,-1.099394,-1.633024,-1.415450,-0.753298
4,-0.005342,-0.128149,-0.995483,0.154544,2.689951,0.246803,1.487742,-0.738581,-1.134661,1.549973,...,-0.820444,-1.813761,0.059071,1.641847,-0.746659,-0.228584,-0.840271,0.899284,0.710280,0.633954
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2836,1.210889,0.605233,-1.254536,0.798792,0.989185,-0.442684,0.615255,-0.004198,0.460976,0.040388,...,-0.575556,-1.725776,0.078569,1.030678,-1.569415,-1.628996,-1.663372,0.147831,1.101275,-0.090091
2837,-1.607422,0.560672,0.211732,-0.428983,0.278820,-0.859638,-0.237076,-0.339976,1.102264,-0.078230,...,-0.385881,0.997247,0.881965,-0.366797,0.009761,0.139275,1.037128,1.401160,0.522745,0.776658
2838,-0.751333,-0.615479,-0.774459,0.817685,1.344045,0.490856,0.643572,-1.366485,1.052982,0.755563,...,-1.642248,-0.590999,0.291287,1.085998,0.402556,0.020732,-0.463104,1.657594,0.099996,1.220423
2839,0.663056,1.437725,-1.110954,0.309514,-1.054076,-1.031078,-0.728029,1.393851,1.159602,-1.543969,...,0.632038,-1.011022,-0.575822,0.730162,-0.843872,-1.535553,-1.454355,-0.509752,-0.044040,-1.091046


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
code_iris,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
920440109,0.155640,0.541241,-1.552484,0.356977,-1.736598,0.142796,-1.446629,1.474845,1.793473,-2.367339,...,-0.648237,-0.885627,-0.937770,0.419687,0.025002,-1.014766,-1.895285,-0.646993,-1.090850,-0.565864
930270105,1.347954,0.650689,0.018240,0.584189,-0.199877,-0.448162,0.299191,-0.175236,-0.107955,-1.175052,...,1.211076,-0.181422,-0.476251,0.415405,-1.052445,-0.799308,-1.164737,-0.697996,0.541870,-1.378236
751197316,-0.698959,0.896405,-0.875261,0.391883,-1.496848,-0.138475,-1.719965,2.227425,1.232168,-2.249786,...,-0.267723,-0.522346,-0.962249,-0.219073,0.724396,-0.248601,-1.354952,-0.742301,-1.021840,-0.666556
751176716,-0.209782,-0.157673,-0.647407,0.808782,-1.313741,1.138889,-1.547369,1.824086,-0.395828,-1.862130,...,0.210744,-0.098627,-1.785979,-0.718676,1.533811,1.153300,-1.099394,-1.633024,-1.415450,-0.753298
920020104,-0.005342,-0.128149,-0.995483,0.154544,2.689951,0.246803,1.487742,-0.738581,-1.134661,1.549973,...,-0.820444,-1.813761,0.059071,1.641847,-0.746659,-0.228584,-0.840271,0.899284,0.710280,0.633954
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
920070111,1.210889,0.605233,-1.254536,0.798792,0.989185,-0.442684,0.615255,-0.004198,0.460976,0.040388,...,-0.575556,-1.725776,0.078569,1.030678,-1.569415,-1.628996,-1.663372,0.147831,1.101275,-0.090091
930050101,-1.607422,0.560672,0.211732,-0.428983,0.278820,-0.859638,-0.237076,-0.339976,1.102264,-0.078230,...,-0.385881,0.997247,0.881965,-0.366797,0.009761,0.139275,1.037128,1.401160,0.522745,0.776658
910270114,-0.751333,-0.615479,-0.774459,0.817685,1.344045,0.490856,0.643572,-1.366485,1.052982,0.755563,...,-1.642248,-0.590999,0.291287,1.085998,0.402556,0.020732,-0.463104,1.657594,0.099996,1.220423
920190113,0.663056,1.437725,-1.110954,0.309514,-1.054076,-1.031078,-0.728029,1.393851,1.159602,-1.543969,...,0.632038,-1.011022,-0.575822,0.730162,-0.843872,-1.535553,-1.454355,-0.509752,-0.044040,-1.091046
