In [None]:
import pandas as pd
import numpy as np
import torch

from data_util.dataset import CityData
from model.regiondcl import PatternEncoder, RegionEncoder
from model.trainer import PatternTrainer, RegionTrainer

#### Arguments used by subsequent code

In [None]:
class Args():
    pass
args = Args()

args.city = "Paris"
args.no_random = False
args.fixed = False
args.dim = 64
args.d_feedforward = 1024
args.building_head = 8
args.building_layers = 2
args.building_dropout = 0.2
args.building_activation = 'relu'
args.bottleneck_head = 8
args.bottleneck_layers = 2
args.bottleneck_dropout = 0.2
args.bottleneck_activation = 'relu'
args.lr=0.0001
args.weight_decay=0.0001
args.gamma=0.999
args.save_name='pattern_embedding'

#### Pattern training

In [None]:
city_data = CityData(args.city, with_random=not args.no_random)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
pattern_encoder = PatternEncoder(d_building=city_data.building_feature_dim,
                                 d_poi=city_data.poi_feature_dim,
                                 d_hidden=args.dim,
                                 d_feedforward=args.d_feedforward,
                                 building_head=args.building_head,
                                 building_layers=args.building_layers,
                                 building_dropout=args.building_dropout,
                                 building_distance_penalty=1,
                                 building_activation=args.building_activation,
                                 bottleneck_head=args.bottleneck_head,
                                 bottleneck_layers=args.bottleneck_layers,
                                 bottleneck_dropout=args.bottleneck_dropout,
                                 bottleneck_activation=args.bottleneck_activation).to(device)

# Encode building patterns.
pattern_optimizer = torch.optim.Adam(pattern_encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
pattern_scheduler = torch.optim.lr_scheduler.StepLR(pattern_optimizer, step_size=1, gamma=args.gamma)
pattern_trainer = PatternTrainer(city_data, pattern_encoder, pattern_optimizer, pattern_scheduler)
pattern_trainer.train_pattern_contrastive(epochs=20, save_name=args.save_name)
print('Pattern (building groups) training finished. Embeddings have been saved in embeddings/ directory.')

In [None]:
embeddings = np.load(f'embeddings/{args.city}/{args.save_name}_20.npy')
region_aggregator = RegionEncoder(d_hidden=args.dim, d_head=8)
region_aggregator.to(device)
region_optimizer = torch.optim.Adam(region_aggregator.parameters(), lr=args.lr, weight_decay=args.weight_decay)
region_scheduler = torch.optim.lr_scheduler.StepLR(region_optimizer, step_size=1, gamma=args.gamma)
region_trainer = RegionTrainer(city_data, 
                               pattern_encoder, 
                               pattern_optimizer, 
                               pattern_scheduler, 
                               region_aggregator,
                               region_optimizer, 
                               region_scheduler)

region_trainer.train_region_triplet_freeze(epochs=20, 
                                           embeddings=embeddings,
                                           adaptive=not args.fixed,
                                           save_name='RegionDCL_',
                                           window_sizes=[1000, 2000, 3000])

print('Training finished. Region embeddings have been saved in embeddings/ directory.')

#### Fondere i region embedding prodotti da RegionDCL con le celle IRIS di Parigi. Gli indici degli embedding dovrebbero essere uguali agli indici in region_downstream.pkl, il quale contiene come regioni le celle IRIS di Parigi. A sua volta, questo e' stato creato dal dataframe originario contenente le celle IRIS di Parigi. Ci dovrebbe quindi essere una corrispondenza 1-1.

In [None]:
region_embs_dict = pd.read_pickle('embeddings/Paris/RegionDCL_20.pkl')
region_embeddings = pd.DataFrame(region_embs_dict.values(), index=region_embs_dict.keys())
display(region_embeddings)

IRIS_Paris_embeddings = pd.read_parquet('IRIS Paris.parquet')
IRIS_Paris_embeddings[region_embeddings.columns] = region_embeddings[:]
del IRIS_Paris_embeddings['code_commune'], IRIS_Paris_embeddings['geometry']
IRIS_Paris_embeddings.set_index('code_iris', inplace = True)

display(IRIS_Paris_embeddings)
IRIS_Paris_embeddings.to_parquet('RegionDCL_IRIS_Paris_embeddings.parquet')