In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas
from sklearn.model_selection import train_test_split

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()
device

device(type='cuda')

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x2189e10d7d0>

In [3]:
df = pandas.read_pickle('Cosine_10000.pickle')

In [4]:
class SiameseNet(nn.Module):
    def __init__(self, embedding_dim, hidden_dim):
        super(SiameseNet, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.embedding = nn.Linear(embedding_dim, hidden_dim)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)

    def forward_once(self, x):
        x = self.embedding(x)
        x = torch.relu(x)
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

    def forward(self, x1, x2):
        out1 = self.forward_once(x1)
        out2 = self.forward_once(x2)
        distance = torch.abs(out1 - out2)
        return distance

In [5]:
class SiameseDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        x1 = self.data.iloc[index][0]
        x2 = self.data.iloc[index][1]
        y = self.data.iloc[index][2]
        return x1, x2, y

    def __len__(self):
        return len(self.data)

In [6]:
embedding_dim = 768
hidden_dim = 128
learning_rate = 0.001
num_epochs = 4
batch_size = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [8]:
# Create dataset and dataloaders
# train_data = [(torch.randn(1, embedding_dim), torch.randn(1, embedding_dim)) for _ in range(1000)]
# train_labels = [torch.randn(1).item() for _ in range(1000)]
train_dataset = SiameseDataset(train_df)
test_dataset = SiameseDataset(test_df)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
# Create model, optimizer, and loss function
model = SiameseNet(embedding_dim, hidden_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [10]:
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for x1, x2, y in train_dataloader:
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(x1, x2)
        y = y.float()
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/4, Loss: 4.268585485580843
Epoch 2/4, Loss: 0.17436544537486043
Epoch 3/4, Loss: 0.048052743604785064
Epoch 4/4, Loss: 0.017889064498376683


In [11]:
total_loss = 0.0
with torch.no_grad():
    for x1, x2, y in test_dataloader:
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        outputs = model(x1, x2)
        loss = criterion(outputs, y)
        total_loss += loss.item() * y.shape[0]
print(f'Test Loss: {total_loss / len(test_dataset)}')

Test Loss: 8.804419241140415e-05


  return F.mse_loss(input, target, reduction=self.reduction)
