In [6]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Dataset
from torch_geometric.nn import GCNConv, GATConv
from sklearn.metrics import r2_score

# Load the data
folder = r"data/Anaheim/Scaled/random_0/10"
attraction = pd.read_csv(f'{folder}/attraction.csv')
production = pd.read_csv(f'{folder}/production.csv')
real_od_matrix = pd.read_csv(f'{folder}/real_od_matrix.csv', index_col=0)
travel_time = pd.read_csv(f'{folder}/travel_time_matrix.csv', index_col=0)
train_od_matrix = pd.read_csv(f'{folder}/at_miss0.10_train_od_matrix.csv', index_col=0)
val_od_matrix = pd.read_csv(f'{folder}/at_miss0.10_val_od_matrix.csv', index_col=0)
test_od_matrix = pd.read_csv(f'{folder}/at_miss0.10_test_od_matrix.csv', index_col=0)

In [7]:
class Dataloader(Dataset):
    def __init__(self, root, transformer=None, pre_transformer=None):
        """ 
        root = where the dataset should be stored. This folder is split
        into raw_dir (downloaded dataset) and processesd_dir (processed data)
        """
        super(Dataloader, self).__init__(root, transformer, pre_transformer)

    @property
    def raw_file_names(self):
        """ if this file exists in the raw_dir, the download is not triggered.
        """
        return "attraction.csv", "production.csv", "real_od_matrix.csv", "travel_time_matrix.csv", "at_miss0.10_train_od_matrix.csv", "at_miss0.10_val_od_matrix.csv", "at_miss0.10_test_od_matrix.csv"

    @property
    def processed_file_names(self):
        """ If these files are found in raw_dir, processing will skiped."""
        return "not_implemented.pt"
    
    def download(self):
        pass

    def process(self):
        

SyntaxError: incomplete input (1024001416.py, line 24)

In [9]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, GATConv
from sklearn.metrics import r2_score

# Load the data
folder = r"data/Anaheim/Scaled/random_0/10"
attraction = pd.read_csv(f'{folder}/attraction.csv')
production = pd.read_csv(f'{folder}/production.csv')
real_od_matrix = pd.read_csv(f'{folder}/real_od_matrix.csv', index_col=0)
travel_time = pd.read_csv(f'{folder}/travel_time_matrix.csv', index_col=0)
train_od_matrix = pd.read_csv(f'{folder}/at_miss0.10_train_od_matrix.csv', index_col=0)
val_od_matrix = pd.read_csv(f'{folder}/at_miss0.10_val_od_matrix.csv', index_col=0)
test_od_matrix = pd.read_csv(f'{folder}/at_miss0.10_test_od_matrix.csv', index_col=0)

# Prepare node features
node_features = np.vstack((attraction["0"].values, production["0"].values)).T

# Prepare edge index and edge attributes
edge_index = []
edge_attr = []
labels = []
for i in range(travel_time.shape[0]):
    for j in range(travel_time.shape[1]):
        if i != j:  # Assuming no self-loops
            edge_index.append([i, j])
            if train_od_matrix.iloc[i, j] in ["False", "No_connection"]:
                edge_attr.append([travel_time.iloc[i, j]])
            else:
                edge_attr.append([travel_time.iloc[i, j], float(train_od_matrix.iloc[i, j])])
            labels.append(real_od_matrix.iloc[i, j])

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_attr = torch.tensor(edge_attr, dtype=torch.float)
labels = torch.tensor(labels, dtype=torch.float)

# Create masks for train, validation, and test
train_mask = []
val_mask = []
test_mask = []
for i in range(train_od_matrix.shape[0]):
    for j in range(train_od_matrix.shape[1]):
        if i != j:  # Assuming no self-loops
            train_mask.append(train_od_matrix.iloc[i, j] != "False" and train_od_matrix.iloc[i, j] != "No_connection")
            val_mask.append(val_od_matrix.iloc[i, j] != "False" and val_od_matrix.iloc[i, j] != "No_connection")
            test_mask.append(test_od_matrix.iloc[i, j] != "False" and test_od_matrix.iloc[i, j] != "No_connection")

train_mask = torch.tensor(train_mask, dtype=torch.bool)
val_mask = torch.tensor(val_mask, dtype=torch.bool)
test_mask = torch.tensor(test_mask, dtype=torch.bool)


ValueError: expected sequence of length 2 at dim 1 (got 1)

In [2]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index, edge_attr)
        x = F.relu(x)
        x = self.conv2(x, edge_index, edge_attr)
        return x


class GAT(torch.nn.Module):
    def __init__(self, in_channels, out_channels, heads=1):
        super(GAT, self).__init__()
        self.conv1 = GATConv(in_channels, 16, heads=heads)
        self.conv2 = GATConv(16 * heads, out_channels, heads=1)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x


In [3]:
def train(model, data, optimizer, criterion, mask):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index, data.edge_attr).view(-1)
    loss = criterion(out[mask], data.y[mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test(model, data, mask):
    model.eval()
    out = model(data.x, data.edge_index, data.edge_attr).view(-1)
    pred = out[mask].detach().cpu().numpy()
    true = data.y[mask].detach().cpu().numpy()
    return r2_score(true, pred)


In [4]:
node_features.shape

(38, 2)

In [6]:
# Prepare the data object
data = Data(x=torch.tensor(node_features, dtype=torch.float),
            edge_index=edge_index,
            edge_attr=edge_attr,
            y=labels)

# Train GCN
gcn_model = GCN(in_channels=2, out_channels=37)
optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

for epoch in range(1000):
    loss = train(gcn_model, data, optimizer, criterion, train_mask)
    if epoch % 100 == 0:
        val_r2 = test(gcn_model, data, val_mask)
        print(f'Epoch {epoch}, Loss: {loss}, Validation R²: {val_r2}')

# Evaluate GCN
test_r2 = test(gcn_model, data, test_mask)
print(f'GCN Test R²: {test_r2}')

# Train GAT
gat_model = GAT(in_channels=2, out_channels=37)
optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

for epoch in range(10000):
    loss = train(gat_model, data, optimizer, criterion, train_mask)
    if epoch % 100 == 0:
        val_r2 = test(gat_model, data, val_mask)
        print(f'Epoch {epoch}, Loss: {loss}, Validation R²: {val_r2}')

# Evaluate GAT
test_r2 = test(gat_model, data, test_mask)
print(f'GAT Test R²: {test_r2}')

Epoch 0, Loss: 0.2286386638879776, Validation R²: -93.13689324207938
Epoch 100, Loss: 0.005003111436963081, Validation R²: -0.22499677682995234
Epoch 200, Loss: 0.004984183702617884, Validation R²: -0.20265408230275272
Epoch 300, Loss: 0.004963832441717386, Validation R²: -0.18858898373962285
Epoch 400, Loss: 0.004943173378705978, Validation R²: -0.17511712999346285
Epoch 500, Loss: 0.004923421889543533, Validation R²: -0.1632392757301766
Epoch 600, Loss: 0.004905179142951965, Validation R²: -0.15343754956477862
Epoch 700, Loss: 0.004888656083494425, Validation R²: -0.14583521941671873


KeyboardInterrupt: 

In [None]:
1406/38

37.0

In [None]:
# Prepare the data object
data = Data(x=torch.tensor(node_features, dtype=torch.float),
            edge_index=edge_index,
            edge_attr=edge_attr,
            y=labels)

# Train GCN
gcn_model = GCN(in_channels=2, out_channels=1)
optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

for epoch in range(100000):
    loss = train(gcn_model, data, optimizer, criterion, train_mask)
    if epoch % 100 == 0:
        val_r2 = test(gcn_model, data, val_mask)
        print(f'Epoch {epoch}, Loss: {loss}, Validation R²: {val_r2}')

# Evaluate GCN
test_r2 = test(gcn_model, data, test_mask)
print(f'GCN Test R²: {test_r2}')

# Train GAT
gat_model = GAT(in_channels=2, out_channels=1)
optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

for epoch in range(1000):
    loss = train(gat_model, data, optimizer, criterion, train_mask)
    if epoch % 100 == 0:
        val_r2 = test(gat_model, data, val_mask)
        print(f'Epoch {epoch}, Loss: {loss}, Validation R²: {val_r2}')

# Evaluate GAT
test_r2 = test(gat_model, data, test_mask)
print(f'GAT Test R²: {test_r2}')


Epoch 0, Loss: 0.46369633078575134, Validation R²: -200.97040817734933
Epoch 100, Loss: 0.004774666391313076, Validation R²: -0.2133450190961601
Epoch 200, Loss: 0.004762969445437193, Validation R²: -0.1925298427379265
Epoch 300, Loss: 0.004750616382807493, Validation R²: -0.18980318828436915
Epoch 400, Loss: 0.004737409297376871, Validation R²: -0.18707524853999957
Epoch 500, Loss: 0.004724108148366213, Validation R²: -0.18455535077891327
Epoch 600, Loss: 0.004711231216788292, Validation R²: -0.18236438797256227
Epoch 700, Loss: 0.004699103068560362, Validation R²: -0.18056557703969256
Epoch 800, Loss: 0.004687908571213484, Validation R²: -0.1791806222292751
Epoch 900, Loss: 0.004677723161876202, Validation R²: -0.17820593607038004
Epoch 1000, Loss: 0.0046685440465807915, Validation R²: -0.17761839073256858
Epoch 1100, Loss: 0.004660314414650202, Validation R²: -0.1773858100732164
Epoch 1200, Loss: 0.0046529462561011314, Validation R²: -0.17747365501151458
Epoch 1300, Loss: 0.00464633

KeyboardInterrupt: 