In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

if "DATABRICKS_RUNTIME_VERSION" in os.environ and not 'installed_libs' in globals():
  #CUDA = 'cu121' 
  installed_libs = True
  
  
  !pip install torch==2.1.0  torchvision==0.16.0 torchtext==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
  import torch
  #os.environ['TORCH'] = torch.__version__
  #print(torch.__version__)
  #torch_version = '2.0.0+cu118'
  
  #!pip install pyg_lib torch_scatter torch_sparse torch_cluster -f https://data.pyg.org/whl/torch-2.1.0+${CUDA}.html # torch_spline_conv
  !pip install torch_geometric
  !pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cu121.html
  #!pip install torch_sparse -f https://data.pyg.org/whl/torch-2.1.0+${CUDA}.html
  #!pip install torch_scatter -f https://data.pyg.org/whl/torch-2.1.0+${CUDA}.html
  #!pip install pyg_lib -f https://data.pyg.org/whl/torch-2.1.0+${CUDA}.html
  !pip install sentence-transformers
  !pip install torcheval
  !pip install matplotlib
  !pip install pandas
  !pip install tensorboard
  
if "DATABRICKS_RUNTIME_VERSION" in os.environ:
  ROOT_FOLDER = '/dbfs/FileStore/GraphNeuralNetworks/'
else:
  ROOT_FOLDER = ''

In [3]:
# sampler

def nf_sampler(batch_size, neg_sample_ratio, edge_label_index, num_learnings):
    # triplet mode only
    # sample some random edges
    num_samples = batch_size
    #sampled_indices = torch.randint(0, edge_label_index.shape[1], (num_samples,), replacement=False)
    sampled_indices = torch.randperm(edge_label_index.shape[1])[:num_samples]
    sampled_edges = edge_label_index[:, sampled_indices]
    # remove sampled edges from edge_label_index with mask
    mask = torch.ones(edge_label_index.shape[1], dtype=torch.bool)
    mask[sampled_indices] = False
    edge_label_index = edge_label_index[:, mask]
    
    assert neg_sample_ratio >= 1
    neg_samples = sampled_indices.shape[0]*neg_sample_ratio
    
    s = sampled_edges[0,:].unsqueeze(0)
    src_edges = s
    for i in range(neg_sample_ratio-1):
        src_edges= torch.cat((src_edges,s),dim=1)

    sampled_negatives = torch.randint(0, num_learnings, (neg_samples,)).squeeze().unsqueeze(0)
   
   
    negative_edge_label_index = torch.cat((src_edges, sampled_negatives),dim=0)
    edge_label_indices = torch.cat((sampled_edges, negative_edge_label_index),dim=1)
    new_edge_label_index = edge_label_index
    batch_edge_label_index = edge_label_indices
    return new_edge_label_index, batch_edge_label_index, torch.cat((torch.ones(batch_size), torch.zeros(neg_samples)))

def nf_loader(edge_label_index, batch_size, num_learnings, neg_sample_ratio):
    while edge_label_index.shape[1] > 0:
        new_edge_label_index, batch_edge_label_index, batch_labels = nf_sampler(batch_size, neg_sample_ratio, edge_label_index, num_learnings)
        edge_label_index = new_edge_label_index
        yield batch_edge_label_index, batch_labels
    

def get_total_minibatch_count_fm(batch_size, edge_label_index):
    return int((edge_label_index.shape[1]+batch_size)//batch_size)
import torch
from torch_geometric.data import HeteroData
#data = HeteroData(torch.load('factorization_machines_dataset.pt'))
train_data = HeteroData(torch.load(ROOT_FOLDER+'FactorizationMachines_Dataset_train_v1.pt'))
val_data = HeteroData(torch.load(ROOT_FOLDER+'FactorizationMachines_Dataset_val_v1.pt'))

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import torch
import torch.nn as nn

class NN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_hidden_layers):
        super().__init__()

        # Define the input layer
        self.fc_input = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        
        # Define hidden layers
        self.hidden_layers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU()
            ) for _ in range(num_hidden_layers)
        ])
        
        # Define the output layer
        self.fc_output = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc_input(x)
        x = self.relu(x)
        
        for hidden_layer in self.hidden_layers:
            x = hidden_layer(x)
        
        x = self.fc_output(x)
        return x





In [5]:
import torch
import torch.nn.functional as F
from models.TransE import TransE
from models.DistMult import DistMult
from models.FactorizationMachineModel import FactorizationMachineModel
import torch_geometric
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Model(torch.nn.Module):
    def __init__(self, nn : torch.nn.Module, head, node_types, edge_types, ggn_output_dim, pnorm=1, num_supervisors=0, num_organizations=0):
        super().__init__()
        # edge_type onehot lookup table with keys
        # node_type onehot lookup table with keys
        self.node_type_embedding = torch.nn.Embedding(len(node_types), ggn_output_dim) # hidden channels should be the output dim of gnn
        self.num_supervisors = num_supervisors
        self.num_organizations = num_organizations
        self.edge_types = edge_types
        for edge_type in edge_types:
            if edge_type[1].startswith('rev_'):
                self.edge_types.remove(edge_type)
        
        # create edge to int mapping
        self.edgeindex_lookup = {edge_type:torch.tensor(i)  for i, edge_type in enumerate(edge_types)}
            
        if head=='TransE': 
            self.head = TransE(len(node_types), len(edge_types) , ggn_output_dim, p_norm= pnorm)  # KGE head with loss function
        elif head=='DistMult':
            self.head = DistMult(len(node_types), len(edge_types) , ggn_output_dim, p_norm= pnorm)  # KGE head with loss function
        else:
            raise NotImplementedError
        
        self.nn = nn
        
    

    def forward(self, hetero_data, edge_label_index, edge_label):
        
    
        people = hetero_data['people'].x[edge_label_index[0,:]]
        # last two columns in people are the indices of onehot, so change them to full onehot supervisor and organization
        supervisors = torch.nn.functional.one_hot(people[:,-2].to(torch.int64), num_classes=self.num_supervisors).to(torch.float32)
        organizations = torch.nn.functional.one_hot(people[:,-1].to(torch.int64), num_classes=self.num_organizations).to(torch.float32)
        people = torch.cat((people[:,:-2], supervisors, organizations), dim=1)
        
                        
        learnings = hetero_data['courses_and_programs'].x[edge_label_index[1,:]]

        scores = self.nn(torch.cat((people,learnings),dim=1))
        pos_scores = scores[edge_label==1]
        neg_scores = scores[edge_label==0]
            

        return F.margin_ranking_loss(
            pos_scores,
            neg_scores,
            target=torch.ones_like(pos_scores), # 1 for similarity, -1 for dissimilarity
            margin=0.2
        )
        
    





#gnn = HGT(hidden_channels=out_channels, out_channels=out_channels, num_heads=num_heads, num_layers=num_layers, node_types=train_data.node_types, data_metadata=metadata)
filename = 'HeteroData_Learnings_normalized_triangles_withadditionaldata_v1.pt'
data_forlookup = HeteroData.from_dict(torch.load(ROOT_FOLDER+filename))
num_supervisors = data_forlookup['people'].num_nodes
num_organizations = data_forlookup['organizations'].num_nodes
metadata = data_forlookup.metadata()
# add selfloops
for node_type in data_forlookup.node_types:
    metadata[1].append((node_type, 'self_loop', node_type))  
    
    

del data_forlookup


out_channels = 64
hidden_channels = 64
num_heads = 0
num_layers = 3
pnorm = 2
head = 'TransE'

input_dim =  train_data['people'].x.shape[1] +train_data['courses_and_programs'].x.shape[1] -2 + num_supervisors + num_organizations

MLP = NN(input_dim, hidden_channels, out_channels, num_layers)

    

# fm = FactorizationMachineModel(
#     field_dims=train_data['people'].x.shape[1]-2+num_supervisors+num_organizations+train_data['courses_and_programs'].x.shape[1],
#                                embed_dim=hidden_channels)






model = Model(MLP, head=head, node_types=metadata[0], edge_types=metadata[1], ggn_output_dim=out_channels, pnorm=pnorm, num_supervisors=num_supervisors, num_organizations=num_organizations)
#torch_geometric.compile(model, dynamic=True)
model.to(device)



Model(
  (node_type_embedding): Embedding(6, 64)
  (head): TransE(6, num_relations=22, hidden_channels=64)
  (nn): NN(
    (fc_input): Linear(in_features=310222, out_features=64, bias=True)
    (relu): ReLU()
    (hidden_layers): ModuleList(
      (0-2): 3 x Sequential(
        (0): Linear(in_features=64, out_features=64, bias=True)
        (1): ReLU()
      )
    )
    (fc_output): Linear(in_features=64, out_features=64, bias=True)
  )
)

In [6]:
from tqdm.auto import tqdm
from datetime import datetime
batch_size = 32

learning_rate = 2e-4
# torch get optimizer by string name
optimizer = 'Adam'
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #2e-15


# create a tensorboard writer
from torch.utils.tensorboard import SummaryWriter
neighbors = '_'.join([str(n) for n in []])


timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

writer = SummaryWriter(ROOT_FOLDER+f'runs/learningpeople_factorizationmachines_{timestamp}_pnorm{pnorm}_lr{learning_rate}_bs{batch_size}_neighbors_{neighbors}_head_{head}_hiddenchannels_{hidden_channels}_outchannels_{out_channels}_numheads_{num_heads}_numlayers_{num_layers}')
print('writer',ROOT_FOLDER+f'runs/learningpeople_factorizationmachines_{timestamp}_pnorm{pnorm}_llr{learning_rate}_bs{batch_size}_neighbors_{neighbors}_head_{head}_hiddenchannels_{hidden_channels}_outchannels_{out_channels}_numheads_{num_heads}_numlayers_{num_layers}')





num_learnings = train_data['courses_and_programs'].num_nodes
neg_sample_ratio = 1
train_loader = nf_loader(train_data['people','completed','courses_and_programs'].edge_label_index, batch_size, num_learnings, neg_sample_ratio)
val_loader = nf_loader(val_data['people','completed','courses_and_programs'].edge_label_index, batch_size, num_learnings, neg_sample_ratio)
total_minibatches = get_total_minibatch_count_fm(batch_size, train_data['people','completed','courses_and_programs'].edge_label_index)


model.train()
start_epoch = 1
for epoch in range(start_epoch, start_epoch+1000):
    for i, (batch_edge_label_index, labels) in tqdm(enumerate(train_loader), total=total_minibatches):
        
        optimizer.zero_grad() 
        # batching is different depending on if node types in edge are same or different
        
        loss = model(train_data.to(device), batch_edge_label_index.to(device), labels.to(device))
        
        loss.backward()
        optimizer.step()
        
        total_samples_seen = i * batch_size
        writer.add_scalar('Loss/train', loss.item(), total_samples_seen)
        
        if i == total_minibatches-1:
            print(f'{i} loss: {loss.item():.4f}')
            writer.add_scalar('Epoch Loss/train', loss.item(), total_samples_seen)
        
        # print loss and minibatch in the same line
        print(f'{i} loss: {loss.item():.4f}', end='\r')
        
        if i % 300 == 0 or i == total_minibatches-1:
            model.eval()
            with torch.no_grad():
                val_loss = 0
                for _ in range(3):
                    try:
                        batch_edge_label_index, labels = next(val_loader)
                    except StopIteration:
                        val_loader = iter(val_loader)
                        batch_edge_label_index, labels = next(val_loader)
                    val_loss = model(val_data.to(device), batch_edge_label_index.to(device), labels.to(device))
                    
                    
            val_loss /= 3
            if i == 0:
                writer.add_scalar('Epoch Loss/val', val_loss, total_samples_seen)
                writer.add_scalar('Loss/val', val_loss, total_samples_seen)
            elif i == total_minibatches-1:
                writer.add_scalar('Epoch Loss/val', val_loss, total_samples_seen)
            else:
                writer.add_scalar('Loss/val', val_loss, total_samples_seen)
            

            print(f'val_loss: {val_loss:.4f}', end='\r')
            model.train()

        writer.flush()
        
        if i % 1000 == 0 or i == total_minibatches-1:
            folder = 'models'
            if not os.path.exists(folder):
                os.makedirs(folder)
            
            run_folder = ROOT_FOLDER+f'{folder}/learningpeople_factorizationmachines_{timestamp}_pnorm{pnorm}_llr{learning_rate}_bs{batch_size}_neighbors_{neighbors}_head_{head}_hiddenchannels_{hidden_channels}_outchannels_{out_channels}_numheads_{num_heads}_numlayers_{num_layers}'
            if not os.path.exists(run_folder):
                os.makedirs(run_folder)
                
            print('saving model to', run_folder)
            # save model and optimizer
            is_epoch = f'Ep{epoch}_' if i == total_minibatches-1 else ''
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                }, run_folder+f'/{is_epoch}model_samplesseen{total_samples_seen}.pt')
            
writer.close()

writer runs/learningpeople_factorizationmachines_20231103_163158_pnorm2_llr0.0002_bs32_neighbors__head_TransE_hiddenchannels_64_outchannels_64_numheads_0_numlayers_3


  0%|          | 0/4826 [00:00<?, ?it/s]../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [0,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [6,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [7,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [8,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [9,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
../aten/src/ATen/native/cuda/ScatterGatherKer

RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`