In [1]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForTokenClassification, pipeline







In [2]:
# Load the fine-tuned model and tokenizer
LOCAL_MODEL_DIR = r'.\\model\mountain_ner_bert'
# Load the fine-tuned model and tokenizer
model = BertForTokenClassification.from_pretrained(LOCAL_MODEL_DIR)
tokenizer = BertTokenizer.from_pretrained(LOCAL_MODEL_DIR)

# Create NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

In [3]:
# Function to predict mountains
def predict_mountains(text):
    results = ner_pipeline(text)
    return [(result['word'], result['entity']) for result in results if result['entity'] != 'O']

In [4]:
def process_results(results):
    mountains = []
    current_mountain = None

    for i in range(len(results)):
        if results[i][1] != 'LABEL_0':
            if current_mountain == None:
                current_mountain = results[i][0]
                
            else:
                if results[i][0].startswith('##'):
                    current_mountain += results[i][0].strip('##')
                else:
                    current_mountain = current_mountain + ' ' + results[i][0]  
        else:
            if current_mountain:
                mountains.append(current_mountain)
                current_mountain = None
    return mountains


In [5]:
df = pd.read_csv(r'./data/mountain_sentences.csv')
df.iloc[19:30]

Unnamed: 0,sentence,mountain
19,Annapurna II is renowned for its breathtaking ...,Annapurna II
20,Gasherbrum IV rises majestically among the stu...,Gasherbrum IV
21,The team celebrated their hard work and determ...,K3
22,Himalchuli stands as a majestic symbol of natu...,Himalchuli
23,Distaghil Sar is renowned for its breathtaking...,Distaghil Sar
24,"Nestled within a breathtaking landscape, Ngadi...",Ngadi Chuli
25,The breathtaking views from the summit offered...,Nuptse
26,Khunyang Chhish is renowned for its breathtaki...,Khunyang Chhish
27,The breathtaking view from the summit offered ...,Masherbrum
28,"K1 stands as a testament to nature's grandeur,...",K1


In [None]:
# Interactive demo
print("\nInteractive demo:")
while True:
    user_input = input("\nEnter a sentence (or 'q' to quit): ")
    if user_input.lower() == 'q':
        break
    predictions = predict_mountains(user_input)
    print("Detected mountains:")
    for word, label in predictions:
        print(f"{word}: {label}")

In [6]:
# Model examination
print("\nModel examination:")
example_sentences = [
    "Mount Rinjani is the highest peak in the world.",
    "I dream of climbing K1 one day.",
    "The view from Donguzorun is breathtaking.",
    "We hiked through the Cerro Chaltén last summer.",
    "Denali, formerly known as Manirang is the highest peak in North America."
]

for sentence in example_sentences:
    predictions = predict_mountains(sentence)
    print(f"\nSentence: {sentence}")
    print("Detected mountains:")
    for word, label in predictions:
        print(f"{word}: {label}")
    print(process_results(predictions))



Model examination:



Sentence: Mount Rinjani is the highest peak in the world.
Detected mountains:
mount: LABEL_1
ri: LABEL_2
##n: LABEL_2
##jan: LABEL_2
##i: LABEL_2
is: LABEL_0
the: LABEL_0
highest: LABEL_0
peak: LABEL_0
in: LABEL_0
the: LABEL_0
world: LABEL_0
.: LABEL_0
['mount rinjani']

Sentence: I dream of climbing K1 one day.
Detected mountains:
i: LABEL_0
dream: LABEL_0
of: LABEL_0
climbing: LABEL_0
k: LABEL_0
##1: LABEL_2
one: LABEL_0
day: LABEL_0
.: LABEL_0
['##1']

Sentence: The view from Donguzorun is breathtaking.
Detected mountains:
the: LABEL_0
view: LABEL_0
from: LABEL_0
dong: LABEL_2
##uz: LABEL_2
##or: LABEL_2
##un: LABEL_2
is: LABEL_0
breath: LABEL_0
##taking: LABEL_0
.: LABEL_0
['donguzorun']

Sentence: We hiked through the Cerro Chaltén last summer.
Detected mountains:
we: LABEL_0
hike: LABEL_0
##d: LABEL_0
through: LABEL_0
the: LABEL_0
cerro: LABEL_1
cha: LABEL_2
##lten: LABEL_2
last: LABEL_0
summer: LABEL_0
.: LABEL_0
['cerro chalten']

Sentence: Denali, formerly known as Manirang i