In [13]:
import torch
from dependencies import BertModel, tokenizer, label_all_tokens

## Load Model
Link to model weights https://drive.google.com/file/d/1V3lG0iYt0R8cMe95x3WncNOkCAEdwiEY/view?usp=drive_link

In [6]:
model = BertModel()
model.load_state_dict(torch.load("my_model.pth"))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<All keys matched successfully>

## Predict one sentence

In [22]:
ids_to_labels = {0: "O", 1: "B-geo", 2: "I-geo"}

def align_word_ids(texts):

    tokenized_inputs = tokenizer(texts, padding='max_length', max_length=512, truncation=True)

    word_ids = tokenized_inputs.word_ids()

    previous_word_idx = None
    label_ids = []

    for word_idx in word_ids:

        if word_idx is None:
            label_ids.append(-100)

        elif word_idx != previous_word_idx:
            try:
                label_ids.append(1)
            except:
                label_ids.append(-100)
        else:
            try:
                label_ids.append(1 if label_all_tokens else -100)
            except:
                label_ids.append(-100)
        previous_word_idx = word_idx

    return label_ids


def evaluate_one_text(model, sentence):


    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:
        model = model.cuda()

    text = tokenizer(sentence, padding="max_length", max_length = 512, truncation=True, return_tensors="pt")

    mask = text["attention_mask"].to(device)
    input_id = text["input_ids"].to(device)
    label_ids = torch.Tensor(align_word_ids(sentence)).unsqueeze(0).to(device)

    logits = model(input_id, mask, None)
    logits_clean = logits[0][label_ids != -100]

    predictions = logits_clean.argmax(dim=1).tolist()
    prediction_label = [ids_to_labels[i] for i in predictions]
    print(sentence)
    print(prediction_label)

## Predictions
Generated sentences using ChatGPT

In [33]:
evaluate_one_text(model, "The Swiss Matterhorn, with its iconic pyramid shape, stands as a testament to the indomitable spirit of alpinism and the allure of challenging summits.")
evaluate_one_text(model, "The Blue Ridge Mountains, draped in a misty morning haze, create an ethereal atmosphere that captivates all who venture into their embrace.")
evaluate_one_text(model, "Standing tall in the Cascade Range, Mount Hood commands attention with its snow-capped summit, a beacon visible for miles around.")
evaluate_one_text(model, "The Rocky Mountains, with their towering summits and sprawling valleys, harbor a diverse ecosystem and provide a haven for outdoor enthusiasts.")
evaluate_one_text(model, "The Sierra Nevada Range, adorned with pristine lakes and towering pine trees, offers a serene retreat for those seeking solace in nature.")
evaluate_one_text(model, "The Atlas Mountains in Morocco unveil a rugged beauty, where ancient traditions and modern life coexist in the shadow of towering peaks.")

The Swiss Matterhorn, with its iconic pyramid shape, stands as a testament to the indomitable spirit of alpinism and the allure of challenging summits.
['O', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
The Blue Ridge Mountains, draped in a misty morning haze, create an ethereal atmosphere that captivates all who venture into their embrace.
['O', 'O', 'B-geo', 'I-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
Standing tall in the Cascade Range, Mount Hood commands attention with its snow-capped summit, a beacon visible for miles around.
['O', 'O', 'O', 'O', 'B-geo', 'I-geo', 'O', 'B-geo', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
The Rocky Mountains, with their towering summits and sprawling valleys, harbor a diverse ecosystem and provide a haven for outdoor enthusiasts.
['O', 'B-geo', 'I-geo'