In [25]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from torch_geometric.nn import SAGEConv, global_mean_pool
from utils import filling, kmeans_clustering, plot_clusters, get_feature_propagation

In [23]:
data = torch.load('Data/santiago_zero_ismt.pt')
data.comuna = data.x[:, 8]
data.ismt = data.x[:,-1]
data.lat = data.x[:,0]
data.lon = data.x[:,1]
data.x = data.x[:, 2:8]
print(data)
print(data.x)
print(data.lat)
print(data.comuna)
print(data.ismt)
print(data.edge_index)
print(data.edge_attributes)

data.ismt = data.ismt.float()

Data(x=[355936, 6], edge_index=[2, 673565], edge_attributes=[673565, 2], comuna=[355936], ismt=[355936], lat=[355936], lon=[355936])
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 1.5074, -1.1100, -1.6402,  1.2143,  1.6439,  1.6780],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]])
tensor([-33.4416, -33.4420, -33.4429,  ..., -33.4390, -33.4389, -33.4386])
tensor([39., 39., 39.,  ..., 42., 42., 42.])
tensor([0.8805, 0.8805, 0.8594,  ..., 0.8625, 0.8625, 0.8625])
tensor([[     0,      1,      2,  ..., 355934, 355935, 355935],
        [  4463,   4467,    488,  ..., 191461, 251187,   7949]])
tensor([[80.8500,  5.8000],
        [18.8300,  1.3000],
        [16.0600,  1.1000],
        ...,
        [33.0400,  4.1000],
        [ 2.5300,  0.2000],
    

In [26]:
data.x = get_feature_propagation(data)

Starting feature filling
tensor([[-1.8762,  0.0880,  1.8812, -0.7552, -2.0797, -1.2788],
        [ 0.2155, -0.1407, -0.2000,  0.7840,  0.9977,  0.7484],
        [ 0.0507, -1.1760, -0.3934,  1.3322,  0.4679,  0.7087],
        ...,
        [ 1.0445, -1.0450, -1.3092,  1.1516,  1.1633,  1.0873],
        [ 1.5074, -1.1100, -1.6402,  1.2143,  1.6439,  1.6780],
        [ 1.2992, -0.1245, -1.1497,  0.2336,  1.0391,  0.9675]])
Feature filling completed. It took: 1.98s


## Train and Test split

In [27]:
index_list = data.edge_index.flatten().unique().tolist()

# Dividir en conjuntos de entrenamiento y prueba
train_index, test_index = train_test_split(index_list, test_size=0.2, random_state=42)

# Dividir el resto en conjuntos de validación y prueba
print("Training set length:", len(train_index))
print("Test set length:", len(test_index))


n_nodes, n_features = data.x.shape

train_mask = torch.zeros(n_nodes, dtype=torch.bool)
test_mask = torch.zeros(n_nodes, dtype=torch.bool)

train_mask[train_index] = True
test_mask[test_index] = True
data['train_mask'] = train_mask
data['test_mask'] = test_mask

Training set length: 284748
Test set length: 71188


## Train, Val, Test split

In [3]:
index_list = data.edge_index.flatten().unique().tolist()

# Porcentaje de índices para cada conjunto
train_percentage = 0.8
val_percentage = 0.1

# Dividir en conjuntos de entrenamiento y prueba
train_index, remaining_index = train_test_split(index_list, train_size=train_percentage, random_state=42)

# Dividir el resto en conjuntos de validación y prueba
val_index, test_index = train_test_split(remaining_index, train_size=val_percentage / (1 - train_percentage), random_state=42)

print("Training set length:", len(train_index))
print("Validation set length:", len(val_index))
print("Test set length:", len(test_index))

n_nodes, n_features = data.x.shape

train_mask = torch.zeros(n_nodes, dtype=torch.bool)
val_mask = torch.zeros(n_nodes, dtype=torch.bool)
test_mask = torch.zeros(n_nodes, dtype=torch.bool)

train_mask[train_index] = True
val_mask[val_index] = True
test_mask[test_index] = True
data['train_mask'] = train_mask
data['val_mask'] = val_mask
data['test_mask'] = test_mask

Training set length: 284748
Validation set length: 35594
Test set length: 35594


In [30]:
data_tensor = torch.cat((data.x, data.ismt.unsqueeze(1), data.train_mask.unsqueeze(1), data.test_mask.unsqueeze(1)), dim=1)
df_ismt = pd.DataFrame(data_tensor.numpy(), columns=['beautiful','boring','depressing','lively','safe','wealthy', 'prom_ismt', 'train', 'test'])

Unnamed: 0,beautiful,boring,depressing,lively,safe,wealthy,prom_ismt,train,test
0,-1.876222,0.087977,1.881226,-0.755172,-2.079733,-1.278782,0.880549,0.0,1.0
1,0.215534,-0.140674,-0.200043,0.784020,0.997746,0.748403,0.880549,1.0,0.0
2,0.050726,-1.175979,-0.393403,1.332155,0.467863,0.708689,0.859413,0.0,1.0
3,-0.448247,-0.247521,0.102419,0.541847,0.165669,0.354410,0.859413,1.0,0.0
4,-0.352609,-1.208094,-0.127524,1.362460,0.405403,0.570324,0.859413,1.0,0.0
...,...,...,...,...,...,...,...,...,...
355931,1.053196,-0.721398,-1.162616,0.851769,1.051627,1.053159,0.862526,1.0,0.0
355932,0.720295,-0.491846,-0.794949,0.580761,0.718764,0.719625,0.862526,0.0,1.0
355933,1.044455,-1.045037,-1.309188,1.151591,1.163294,1.087302,0.862526,0.0,1.0
355934,1.507416,-1.109993,-1.640187,1.214309,1.643906,1.677996,0.862526,1.0,0.0
