In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn import metrics
from torch.utils.data import DataLoader, Dataset
import math
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
ATTRIBUTES = ['SOURCE', 'TARGET', 'RATING', 'TIME']
FEATURE_ATTRIBUTES = ATTRIBUTES[:2]
TARGET_ATTRIBUTE = ATTRIBUTES[2]

In [6]:
class BitcoinOTCDataset(Dataset):
    def __init__(self, data):
        self.sources = data['SOURCE'].values.astype(np.int64)
        self.targets = data['TARGET'].values.astype(np.int64)
        self.ratings = data['RATING'].values.astype(np.float32)

    def __len__(self):
        return len(self.sources)

    def __getitem__(self, idx):
        return self.sources[idx], self.targets[idx], self.ratings[idx]

In [4]:
def read_file(data_file):
    return pd.read_csv(data_file, names=ATTRIBUTES, skiprows=1)

def normalize_ratings(ratings):
    min_rating = ratings.min()
    max_rating = ratings.max()
    return 2 * (ratings - min_rating) / (max_rating - min_rating) - 1

class ModelR(nn.Module):
    def __init__(self, num_nodes, layer_size, num_hidden_layers):
        super(ModelR, self).__init__()
        self.source_embedding = nn.Embedding(num_nodes, layer_size)
        self.target_embedding = nn.Embedding(num_nodes, layer_size)

        layers = []
        input_size = 2 * layer_size
        for _ in range(num_hidden_layers):
            layers.append(nn.Linear(input_size, layer_size))
            layers.append(nn.ReLU())
            input_size = layer_size
        layers.append(nn.Linear(input_size, 1))
        self.network = nn.Sequential(*layers)

    def forward(self, source, target):
        source_vec = self.source_embedding(source)
        target_vec = self.target_embedding(target)
        combined = torch.cat([source_vec, target_vec], dim=1)
        return self.network(combined)

def train_model(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for source, target, rating in dataloader:
        source, target, rating = source.to(device), target.to(device), rating.to(device)
        optimizer.zero_grad()
        output = model(source, target).squeeze()
        loss = criterion(output, rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate_model(model, dataloader, device):
    model.eval()
    predictions = []
    targets = []
    with torch.no_grad():
        for source, target, rating in dataloader:
            source, target, rating = source.to(device), target.to(device), rating.to(device)
            output = model(source, target).squeeze()
            predictions.append(output.cpu().numpy())
            targets.append(rating.cpu().numpy())
    predictions = np.concatenate(predictions)
    targets = np.concatenate(targets)
    mse = metrics.mean_squared_error(targets, predictions)
    return mse, predictions, targets


In [7]:
def main():
    data_file = '/content/drive/My Drive/datamining/soc-sign-bitcoinotc.csv'
    data_set = read_file(data_file)
    data_set[TARGET_ATTRIBUTE] = normalize_ratings(data_set[TARGET_ATTRIBUTE])

    num_nodes = max(data_set['SOURCE'].max(), data_set['TARGET'].max()) + 1

    train, test = train_test_split(data_set, test_size=0.2)
    train, validate = train_test_split(train, test_size=0.125)  # 0.125 * 80% = 10%

    train_dataset = BitcoinOTCDataset(train)
    validate_dataset = BitcoinOTCDataset(validate)
    test_dataset = BitcoinOTCDataset(test)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    validate_loader = DataLoader(validate_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    layer_size = int(math.log2(len(train_dataset)))
    num_hidden_layers = 3
    num_epochs = 20

    model = ModelR(num_nodes, layer_size, num_hidden_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader, criterion, optimizer, device)
        val_loss, _, _ = evaluate_model(model, validate_loader, device)
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss}, Validation Loss: {val_loss}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')

    model.load_state_dict(torch.load('best_model.pth'))
    test_mse, predictions, targets = evaluate_model(model, test_loader, device)
    print(f'Mean Squared Error on Test Set: {test_mse}')

if __name__ == '__main__':
    main()

Epoch 1/20, Train Loss: 0.1239828199772248, Validation Loss: 0.12484066188335419
Epoch 2/20, Train Loss: 0.11563097666686016, Validation Loss: 0.11691176891326904
Epoch 3/20, Train Loss: 0.10500162123285224, Validation Loss: 0.10966543853282928
Epoch 4/20, Train Loss: 0.0967426047097313, Validation Loss: 0.105322927236557
Epoch 5/20, Train Loss: 0.09035066033645374, Validation Loss: 0.10549129545688629
Epoch 6/20, Train Loss: 0.08452086784758525, Validation Loss: 0.10259958356618881
Epoch 7/20, Train Loss: 0.07921163422271935, Validation Loss: 0.10369354486465454
Epoch 8/20, Train Loss: 0.074245868229208, Validation Loss: 0.10234261304140091
Epoch 9/20, Train Loss: 0.0699896227271895, Validation Loss: 0.10481933504343033
Epoch 10/20, Train Loss: 0.06562960951496992, Validation Loss: 0.1047307699918747
Epoch 11/20, Train Loss: 0.06157644573881246, Validation Loss: 0.10935028642416
Epoch 12/20, Train Loss: 0.057707972121598325, Validation Loss: 0.10799530148506165
Epoch 13/20, Train Loss

In [8]:
def main():
    data_file = '/content/drive/My Drive/datamining/soc-sign-bitcoinotc.csv'
    data_set = read_file(data_file)
    data_set[TARGET_ATTRIBUTE] = normalize_ratings(data_set[TARGET_ATTRIBUTE])

    num_nodes = max(data_set['SOURCE'].max(), data_set['TARGET'].max()) + 1

    train, test = train_test_split(data_set, test_size=0.2)
    train, validate = train_test_split(train, test_size=0.125)  # 0.125 * 80% = 10%

    train_dataset = BitcoinOTCDataset(train)
    validate_dataset = BitcoinOTCDataset(validate)
    test_dataset = BitcoinOTCDataset(test)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    validate_loader = DataLoader(validate_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    layer_size = int(math.log2(len(train_dataset)))
    num_hidden_layers = 4
    num_epochs = 20

    model = ModelR(num_nodes, layer_size, num_hidden_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader, criterion, optimizer, device)
        val_loss, _, _ = evaluate_model(model, validate_loader, device)
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss}, Validation Loss: {val_loss}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')

    model.load_state_dict(torch.load('best_model.pth'))
    test_mse, predictions, targets = evaluate_model(model, test_loader, device)
    print(f'Mean Squared Error on Test Set: {test_mse}')

if __name__ == '__main__':
    main()

Epoch 1/20, Train Loss: 0.12694358379270437, Validation Loss: 0.12471532076597214
Epoch 2/20, Train Loss: 0.11956258418590825, Validation Loss: 0.11771056056022644
Epoch 3/20, Train Loss: 0.10916073635980889, Validation Loss: 0.11204668879508972
Epoch 4/20, Train Loss: 0.09892396211196143, Validation Loss: 0.10788028687238693
Epoch 5/20, Train Loss: 0.0915631172061923, Validation Loss: 0.1055864617228508
Epoch 6/20, Train Loss: 0.085112581764789, Validation Loss: 0.10705506056547165
Epoch 7/20, Train Loss: 0.07987717756121605, Validation Loss: 0.10824032872915268
Epoch 8/20, Train Loss: 0.075276545848487, Validation Loss: 0.1049102395772934
Epoch 9/20, Train Loss: 0.07059848117277466, Validation Loss: 0.10725151002407074
Epoch 10/20, Train Loss: 0.06693408270790906, Validation Loss: 0.10571295768022537
Epoch 11/20, Train Loss: 0.06299944507168541, Validation Loss: 0.11059458553791046
Epoch 12/20, Train Loss: 0.05940645471255896, Validation Loss: 0.10796263068914413
Epoch 13/20, Train L