In [83]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('../datasets/edges/ArtistW_sample.csv', delimiter=';', names=['artist_id_from', 'artist_id_to', 'weight'])

# Extract artist IDs and edge weights
edges = df[['artist_id_from', 'artist_id_to']].values
weights = df['weight'].values

In [84]:
import torch
from torch_geometric.data import Data

# Convert to PyTorch tensors
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
edge_weight = torch.tensor(weights, dtype=torch.float)

# Create a graph data object
data = Data(edge_index=edge_index, edge_attr=edge_weight)

## EMBEDDINGS

In [85]:
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

class GraphAutoencoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphAutoencoder, self).__init__()
        self.encoder = GCNConv(in_channels, hidden_channels)
        self.decoder = nn.Linear(hidden_channels, out_channels)

    def encode(self, x, edge_index):
        x = self.encoder(x, edge_index)
        return x

    def decode(self, z):
        adj_pred = torch.sigmoid(torch.matmul(z, z.t()))
        return adj_pred

    def forward(self, data):
        z = self.encode(data.x, data.edge_index)
        adj_pred = self.decode(z)
        return adj_pred

In [86]:
import torch.optim as optim
from torch_geometric.utils import to_dense_adj

# Crear datos de ejemplo
num_nodes = data.num_nodes
data.x = torch.eye(num_nodes)  # Usar una matriz identidad como características de los nodos

# Inicializar el modelo, el optimizador y la función de pérdida
model = GraphAutoencoder(in_channels=num_nodes, hidden_channels=16, out_channels=num_nodes)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

# Convertir el grafo a una matriz de adyacencia densa
adj = to_dense_adj(data.edge_index).squeeze()

# Bucle de entrenamiento
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    adj_pred = model(data)
    loss = criterion(adj_pred, adj)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')



Epoch 1, Loss: 0.25000089406967163
Epoch 2, Loss: 0.25081339478492737
Epoch 3, Loss: 0.25002557039260864
Epoch 4, Loss: 0.2502765357494354
Epoch 5, Loss: 0.2505008280277252
Epoch 6, Loss: 0.25027593970298767
Epoch 7, Loss: 0.2500344216823578
Epoch 8, Loss: 0.25004228949546814
Epoch 9, Loss: 0.25019755959510803
Epoch 10, Loss: 0.25025197863578796
Epoch 11, Loss: 0.25015485286712646
Epoch 12, Loss: 0.25003519654273987
Epoch 13, Loss: 0.25001025199890137
Epoch 14, Loss: 0.25007548928260803
Epoch 15, Loss: 0.2501334547996521
Epoch 16, Loss: 0.2501166760921478
Epoch 17, Loss: 0.250051349401474
Epoch 18, Loss: 0.25000688433647156
Epoch 19, Loss: 0.2500184178352356
Epoch 20, Loss: 0.25005820393562317
Epoch 21, Loss: 0.25007495284080505
Epoch 22, Loss: 0.2500515282154083
Epoch 23, Loss: 0.25001591444015503
Epoch 24, Loss: 0.2500036060810089
Epoch 25, Loss: 0.25001999735832214
Epoch 26, Loss: 0.25003960728645325
Epoch 27, Loss: 0.25003838539123535
Epoch 28, Loss: 0.25001904368400574
Epoch 29, L

In [87]:
# Obtener las representaciones de los nodos
model.eval()
with torch.no_grad():
    node_embeddings = model.encode(data.x, data.edge_index)

## CLASSIFIER

In [88]:
# read labels
labels = pd.read_json('../datasets/edges/ArtistW_labels.json', typ='series')
# Suponiendo que 'labels' es un tensor con las etiquetas de los nodos
labels = torch.tensor(labels.values, dtype=torch.long)

In [89]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Definir un clasificador con Dropout
class NodeClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(NodeClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.dropout = nn.Dropout(p=0.25)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.activation = nn.ReLU()

    def forward(self, x):
        x = F.elu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Definir el número de clases
num_classes = len(torch.unique(labels))

# Obtener las representaciones de los nodos
model.eval()
with torch.no_grad():
    node_embeddings = model.encode(data.x, data.edge_index)

# Inicializar el clasificador
classifier = NodeClassifier(input_dim=node_embeddings.size(1), hidden_dim=64, output_dim=num_classes)
optimizer = optim.Adam(classifier.parameters(), lr=0.01, weight_decay=5e-4)  # Añadir L2 regularización
criterion = nn.CrossEntropyLoss()

# Bucle de entrenamiento para el clasificador
for epoch in range(1000):
    classifier.train()
    optimizer.zero_grad()
    out = classifier(node_embeddings)
    loss = criterion(out, labels)  # 'labels' debe ser un tensor con las etiquetas de los nodos
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 3.107795238494873
Epoch 2, Loss: 3.0013351440429688
Epoch 3, Loss: 2.903984546661377
Epoch 4, Loss: 2.8109779357910156
Epoch 5, Loss: 2.722201347351074
Epoch 6, Loss: 2.640368938446045
Epoch 7, Loss: 2.56477427482605
Epoch 8, Loss: 2.499610662460327
Epoch 9, Loss: 2.4440035820007324
Epoch 10, Loss: 2.397148847579956
Epoch 11, Loss: 2.3637447357177734
Epoch 12, Loss: 2.3397796154022217
Epoch 13, Loss: 2.324061870574951
Epoch 14, Loss: 2.3161611557006836
Epoch 15, Loss: 2.3156545162200928
Epoch 16, Loss: 2.312835693359375
Epoch 17, Loss: 2.3098535537719727
Epoch 18, Loss: 2.3081417083740234
Epoch 19, Loss: 2.3071553707122803
Epoch 20, Loss: 2.2936253547668457
Epoch 21, Loss: 2.2905325889587402
Epoch 22, Loss: 2.2818758487701416
Epoch 23, Loss: 2.2727932929992676
Epoch 24, Loss: 2.265475034713745
Epoch 25, Loss: 2.2605416774749756
Epoch 26, Loss: 2.256594181060791
Epoch 27, Loss: 2.253465175628662
Epoch 28, Loss: 2.25321888923645
Epoch 29, Loss: 2.2492289543151855
Epoch 30,

In [90]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluar el clasificador
classifier.eval()
with torch.no_grad():
    pred = classifier(node_embeddings).argmax(dim=1)
    accuracy = accuracy_score(labels.cpu(), pred.cpu())
    precision = precision_score(labels.cpu(), pred.cpu(), average='weighted')
    recall = recall_score(labels.cpu(), pred.cpu(), average='weighted')
    f1 = f1_score(labels.cpu(), pred.cpu(), average='weighted')
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1 Score: {f1}')

Accuracy: 0.24808790261768826
Precision: 0.13274945669972377
Recall: 0.24808790261768826
F1 Score: 0.13499726957332053


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## CROSS VALIDATION

In [91]:
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Suponiendo que 'node_embeddings' y 'labels' ya están definidos
kf = KFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
precisions = []
recalls = []
f1_scores = []

for train_index, test_index in kf.split(node_embeddings):
    X_train, X_test = node_embeddings[train_index], node_embeddings[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    
    classifier = NodeClassifier(input_dim=node_embeddings.size(1), hidden_dim=100, output_dim=num_classes)
    optimizer = optim.Adam(classifier.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()
    
    # Entrenamiento del clasificador
    for epoch in range(1000):
        classifier.train()
        optimizer.zero_grad()
        out = classifier(X_train)
        loss = criterion(out, y_train)
        loss.backward()
        optimizer.step()
    
    # Evaluación del clasificador
    classifier.eval()
    with torch.no_grad():
        pred = classifier(X_test).argmax(dim=1)
        accuracies.append(accuracy_score(y_test.cpu(), pred.cpu()))
        precisions.append(precision_score(y_test.cpu(), pred.cpu(), average='weighted'))
        recalls.append(recall_score(y_test.cpu(), pred.cpu(), average='weighted'))
        f1_scores.append(f1_score(y_test.cpu(), pred.cpu(), average='weighted'))

print(f'Average Accuracy: {sum(accuracies) / len(accuracies)}')
print(f'Average Precision: {sum(precisions) / len(precisions)}')
print(f'Average Recall: {sum(recalls) / len(recalls)}')
print(f'Average F1 Score: {sum(f1_scores) / len(f1_scores)}')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average Accuracy: 0.3639976012397461
Average Precision: 0.311928957251956
Average Recall: 0.36399760123974606
Average F1 Score: 0.30986871794342086


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Model Improved

In [92]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Definir un clasificador mejorado con más capas, Batch Normalization y Dropout
class ImprovedNodeClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(ImprovedNodeClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.bn1 = nn.BatchNorm1d(hidden_dim1)
        self.dropout1 = nn.Dropout(p=0.25)
        
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.bn2 = nn.BatchNorm1d(hidden_dim2)
        self.dropout2 = nn.Dropout(p=0.25)
        
        self.fc3 = nn.Linear(hidden_dim2, output_dim)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Ejemplo de uso
input_dim = node_embeddings.size(1)
hidden_dim1 = 64
hidden_dim2 = 32
output_dim = num_classes

model = ImprovedNodeClassifier(input_dim, hidden_dim1, hidden_dim2, output_dim)

In [93]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Inicializar el clasificador mejorado
classifier = ImprovedNodeClassifier(input_dim, hidden_dim1, hidden_dim2, output_dim)
optimizer = optim.Adam(classifier.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

# Bucle de entrenamiento para el clasificador mejorado
for epoch in range(100):
    classifier.train()
    optimizer.zero_grad()
    out = classifier(node_embeddings)
    loss = criterion(out, labels)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Evaluar el clasificador mejorado
classifier.eval()
with torch.no_grad():
    pred = classifier(node_embeddings).argmax(dim=1)
    accuracy = accuracy_score(labels.cpu(), pred.cpu())
    precision = precision_score(labels.cpu(), pred.cpu(), average='weighted')
    recall = recall_score(labels.cpu(), pred.cpu(), average='weighted')
    f1 = f1_score(labels.cpu(), pred.cpu(), average='weighted')
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1 Score: {f1}')

Epoch 1, Loss: 3.3107614517211914
Epoch 2, Loss: 3.09382963180542
Epoch 3, Loss: 2.959340810775757
Epoch 4, Loss: 2.8455047607421875
Epoch 5, Loss: 2.7401890754699707
Epoch 6, Loss: 2.6299726963043213
Epoch 7, Loss: 2.548335075378418
Epoch 8, Loss: 2.4782910346984863
Epoch 9, Loss: 2.4219117164611816
Epoch 10, Loss: 2.3759562969207764
Epoch 11, Loss: 2.327646017074585
Epoch 12, Loss: 2.2884724140167236
Epoch 13, Loss: 2.2627952098846436
Epoch 14, Loss: 2.2387607097625732
Epoch 15, Loss: 2.2145142555236816
Epoch 16, Loss: 2.1930041313171387
Epoch 17, Loss: 2.1732118129730225
Epoch 18, Loss: 2.1656274795532227
Epoch 19, Loss: 2.1509721279144287
Epoch 20, Loss: 2.133121967315674
Epoch 21, Loss: 2.1289424896240234
Epoch 22, Loss: 2.115555763244629
Epoch 23, Loss: 2.1133201122283936
Epoch 24, Loss: 2.094125270843506
Epoch 25, Loss: 2.0840322971343994
Epoch 26, Loss: 2.075995445251465
Epoch 27, Loss: 2.0717360973358154
Epoch 28, Loss: 2.068911552429199
Epoch 29, Loss: 2.0534636974334717
Epoc

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
