# Instructions
Run the notebook except the last cell to load all the packages, models and functions. Then, modify the `user_sentence` string variable in the last cell of the notebook; set it to be the sentence you would like the model to predict a relation for. Your sentence must contain tags to indicate the entities of the relation you would like the model to predict. For example your sentence may be: `'The <e1>bottle</e1> is filled with <e2>water</e2>.'`. The `'<e1>'` and `'</e1>'` tags indicate the first entity, `'bottle'`, and the `'<e2>'` and `'</e2>'` tags indicate the second entity, `'water'`.

# Install Packages

In [None]:
!pip install numpy torch transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

# Import Packages and Load Models

In [None]:
import json
import pickle

import numpy as np
import torch
from transformers import BertTokenizer, BertModel

# path = '/'
from google.colab import drive
path = '/content/drive/MyDrive/Text_Mining/BERT-SVM/'
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
with open(path + 'Models/svm_best_model.pickle', 'rb') as fp:
    loaded_svm_model = pickle.load(fp)
bert_model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = BertModel.from_pretrained(bert_model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_model.to(device)
bert_model.eval()
device

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

device(type='cpu')

# List of Relations Mapped by Numeric Labels

In [None]:
label_names = {
    0: 'Cause-Effect(e1,e2)',
    1: 'Cause-Effect(e2,e1)',
    2: 'Component-Whole(e1,e2)',
    3: 'Component-Whole(e2,e1)',
    4: 'Content-Container(e1,e2)',
    5: 'Content-Container(e2,e1)',
    6: 'Entity-Destination(e1,e2)',
    7: 'Entity-Destination(e2,e1)',
    8: 'Entity-Origin(e1,e2)',
    9: 'Entity-Origin(e2,e1)',
    10: 'Instrument-Agency(e1,e2)',
    11: 'Instrument-Agency(e2,e1)',
    12: 'Member-Collection(e1,e2)',
    13: 'Member-Collection(e2,e1)',
    14: 'Message-Topic(e1,e2)',
    15: 'Message-Topic(e2,e1)',
    16: 'Product-Producer(e1,e2)',
    17: 'Product-Producer(e2,e1)',
    18: 'Other',
}

# Try out the model

In [None]:
def predict_relation(sentence, tokenizer, bert_model, svm_model, label_names, device):

    # Tokenize the input sentence
    encoded = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt")

    # Move tensors to the correct device (GPU or CPU)
    input_ids = encoded["input_ids"].to(device)
    attention_mask = encoded["attention_mask"].to(device)

    # Get BERT embeddings
    with torch.no_grad():
        outputs = bert_model(input_ids, attention_mask=attention_mask)

    sentence_embedding = outputs.last_hidden_state.mean(dim=1)
    X_new = sentence_embedding.cpu().numpy()

    # Predict label using the SVM model
    predicted_label_idx = svm_model.predict(X_new)[0]

    # Convert numeric label back to relation name
    predicted_relation = label_names[predicted_label_idx]

    return predicted_relation


In [None]:
user_sentence = 'The <e1>bottle</e1> is filled with <e2>water</e2>.'

print(predict_relation(user_sentence, tokenizer, bert_model, loaded_svm_model, label_names, device))

Content-Container(e2,e1)
