In [None]:
# Python
from tqdm import tqdm
import matplotlib.pyplot as plt

# NumPy and PyTorch
import torch
import numpy as np
import torch.optim as optim
from torch.utils.data import DataLoader

# Custom
from path_reader import PathDataLoader
from networks import SiameseNetwork
from losses import ContrastiveLossSimple

In [None]:
def init_data(paths, batch_size, ratio):

    all_data = []
    all_hash = []

    for i in range(len(paths)):
      for j in range(len(paths[0])):
        if len(paths[i][j]) != 0:
          for k in paths[i][j]:
            arr = []
            c = 0
            for l in range(len(k.points)):
              c += 1
              for m in range(len(k.points[l])):
                arr.append(k.points[l][m])
            for l in range((5-c)*3, 0, -1):
              arr.append(0)
            for l in range(len(k.interaction_types)):
              arr.append(k.interaction_types[l])
            for l in range(5-c, 0, -1):
              arr.append(0)
            all_hash.append(k.hash)
            arr.append(k.path_gain_db)
            all_data.append(arr)

    data_pairs = []
    tol = 1000000
    for i in range(len(all_data)):
        for j in range(len(all_data)):
            if i != j:  # Ensure the pairs are not identical
                if abs(all_hash[i] - all_hash[j]) < tol:
                  label = 0
                else:
                  label = 1
                data_pairs.append((torch.tensor(all_data[i], dtype=torch.float), torch.tensor(all_data[j], dtype=torch.float), torch.tensor(label, dtype=torch.long)))
        print(len(data_pairs))
        if len(data_pairs) > 1000000:
          break

    dataloaders = DataLoader(data_pairs, batch_size, shuffle=True)
    """
    numpy_data = np.array(data_pairs)

    datasets = {}
    datasets['train'] = np.array(numpy_data[:int(len(numpy_data) * ratio)])
    datasets['val'] = np.array(numpy_data[int(len(numpy_data) * ratio):])


    # Shuffle data
    dataset_length = len(datasets['train'])
    indices = list(range(dataset_length))
    np.random.shuffle(indices)
    sampler = torch.utils.data.SubsetRandomSampler(indices)

    # Create DataLoaders using these samplers
    dataloaders = {
        'train': torch.utils.data.DataLoader(
            datasets['train'],
            batch_size=batch_size,
            sampler=sampler,
            num_workers=2,
            pin_memory=False,
        ),
        'val': torch.utils.data.DataLoader(
            datasets['val'],
            batch_size=batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=False,
        )
    }
    """

    return dataloaders

In [None]:
def train_siamese_network(train_loader, net, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        for i, data in enumerate(train_loader, 0):
            input1, input2, label = data
            output1, output2 = net(input1), net(input2)
            loss = criterion(output1, output2, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if i % 1000 == 0:
                print(f"Epoch {epoch}, Iteration {i}, Loss {loss.item()}")

In [None]:
# Load data
pathLoader = PathDataLoader()
paths = pathLoader.read_file('eu_city_2x2_macro_306.bin')

# Set random seeds
np.random.seed(0)

In [None]:
# Format data
batch_size = 10
train_val_ratio = 0.95
dataloaders = init_data(paths, batch_size, train_val_ratio)

In [None]:
# Instantiate the Siamese Network and Loss Function
net = SiameseNetwork()
criterion = ContrastiveLossSimple()
optimizer = optim.Adam(net.parameters(), lr=0.0005)

In [None]:
train_siamese_network(dataloaders, net, criterion, optimizer, epochs=2)

In [None]:
all_data = []
all_hash = []
for i in range(len(paths)):
  for j in range(len(paths[0])):
    if len(paths[i][j]) != 0:
      for k in paths[i][j]:
        arr = []
        c = 0
        for l in range(len(k.points)):
          c += 1
          for m in range(len(k.points[l])):
            arr.append(k.points[l][m])
        for l in range((5-c)*3, 0, -1):
          arr.append(0)
        for l in range(len(k.interaction_types)):
          arr.append(k.interaction_types[l])
        for l in range(5-c, 0, -1):
          arr.append(0)
        arr.append(k.path_gain_db)
        all_hash.append(k.hash)
        all_data.append(arr)

In [None]:


"""
hash1 = all_hash[index1]
hash2 = all_hash[index2]

for i in range(len(all_hash)):
  for j in range(len(all_hash)):
    if i != j and all_hash[i] < 3000000000 and all_hash[j] < 3000000000:
      if abs(all_hash[i] - all_hash[j]) < 750000000:
        print("i", i)
        print("hash i", all_hash[i])
        print("j", j)
        print("hash j", all_hash[j])
"""

print(all_hash[0])
print(all_hash[1])
index1 = 0
index2 = 1
print(torch.tensor(all_data[index1]))
print(torch.tensor(all_data[index2]))
print(net(torch.tensor(all_data[index1])))
print(net(torch.tensor(all_data[index2])))
embeddings = net(torch.tensor(all_data[index2]))

In [None]:
import torch
import matplotlib.pyplot as plt

# Assuming you have the 1D embeddings stored in the 'embeddings' variable

# Convert embeddings to a list
embeddings_list = embeddings.squeeze().tolist()

# Create x-axis indices
indices = list(range(len(embeddings_list)))

# Plot the 1D embeddings
plt.figure(figsize=(8, 6))
plt.plot(indices, embeddings_list, marker='o', linestyle='-')
plt.title('Visualization of 1D Embeddings')
plt.xlabel('Component')
plt.ylabel('Embedding Value')
plt.show()