## Importing libraries and tools

In [None]:
# Setting up PyTorch and Transformers for the NER model
pip install transformers torch sklearn spacy

In [37]:
# Importing necessary libraries
import json
import transformers
import torch
import numpy as np
import re
import spacy
from tqdm import tqdm
from spacy.tokens import DocBin

## Load and preprocess data 

In [25]:
# Loading the dataset from the JSON file
f = open('mountains_data.json')
data = json.load(f)

In [26]:
# Load the spaCy model
nlp = spacy.blank("en")

# Function to process data and return a DocBin
def process_data(file_path):
    db = DocBin()
    with open(file_path, 'r') as f:
        data = json.load(f)
    for text, annot in tqdm(data['annotations']):
        doc = nlp.make_doc(text)
        ents = []
        for start, end, label in annot["entities"]:
            span = doc.char_span(start, end, label=label, alignment_mode="contract")
            if span is None:
                print("Skipping entity:", text[start:end], "at:", start, end)
            else:
                ents.append(span)
        doc.ents = ents
        db.add(doc)
    return db

# Process training data
train_db = process_data('mountains_data.json')
train_db.to_disk("./training_data.spacy")


100%|██████████| 23/23 [00:00<00:00, 850.79it/s]

Skipping entity: ed Olym at: 1140 1147





In [27]:
# Setup process for training a custom Named Entity Recognition (NER) model using the spaCy library
! python -m spacy init config config.cfg --lang en --pipeline ner --optimize efficiency

[38;5;3m[!] To generate a more effective transformer-based config (GPU-only),
install the spacy-transformers package and re-run this command. The config
generated now does not use transformers.[0m
[38;5;4m[i] Generated config template specific for your use case[0m
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[38;5;2m[+] Auto-filled config with all values[0m
[38;5;2m[+] Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [28]:
#  Training a spaCy model based on a predefined configuration and training data
! python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy

[38;5;4m[i] Saving to output directory: .[0m
[38;5;4m[i] Using CPU[0m
[1m
[38;5;2m[+] Initialized pipeline[0m
[1m
[38;5;4m[i] Pipeline: ['tok2vec', 'ner'][0m
[38;5;4m[i] Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     39.67    0.00    0.00    0.00    0.00
 15     200         29.08   1412.88  100.00  100.00  100.00    1.00
 33     400         18.60     19.30  100.00  100.00  100.00    1.00
 53     600          4.46      3.04  100.00  100.00  100.00    1.00
 75     800          0.00      0.00  100.00  100.00  100.00    1.00
 99    1000         78.83     45.77  100.00  100.00  100.00    1.00
123    1200         49.01     11.94  100.00  100.00  100.00    1.00
153    1400          0.00      0.00  100.00  100.00  100.00    1.00
191    1600          0.00      0.00  100.00  100.00  100.00    1.00
238    1800          0.00      0.00  100.

[2023-12-11 17:54:23,433] [INFO] Set up nlp object from config
[2023-12-11 17:54:23,448] [INFO] Pipeline: ['tok2vec', 'ner']
[2023-12-11 17:54:23,452] [INFO] Created vocabulary
[2023-12-11 17:54:23,453] [INFO] Finished initializing nlp object
[2023-12-11 17:54:23,653] [INFO] Initialized pipeline components: ['tok2vec', 'ner']


### Cheking model work 

In [29]:
# Load the trained model from the output directory
trained_model = spacy.load("./output/model-best") 

In [35]:
# Example text
text = '''In the heart of Europe, the majestic Alps stretch across borders, embracing both France and Switzerland with their snow-capped peaks. Among these giants, Mont Blanc stands tall, a beacon to mountaineers and nature enthusiasts alike. Its towering presence, reaching an elevation of 4,808 meters, not only marks the highest point in the Alps but also in Western Europe.
Journeying eastward into Switzerland, the Eiger looms with its notorious north face, a challenge that has tested the mettle of climbers for decades. Not far from the Eiger, the Matterhorn rises with its iconic pyramidal shape, straddling the Swiss-Italian border. This emblematic peak, with its steep faces and razor-sharp ridges.
Further south, the grandeur of the Swiss Alps is exemplified by the Dufourspitze, part of the Monte Rosa massif. It stands as Switzerland's highest peak, offering breathtaking vistas that extend across the alpine horizon. Back in France, the rugged beauty of the Pelvoux and Barre des Écrins in the Dauphiné Alps offers yet another playground for alpine adventurers.
These mountains, Mont Blanc, Eiger, Matterhorn, Dufourspitze and Barre des Écrins, each tell their own story of geological marvels and human endeavors. They stand as silent witnesses to the passage of time, continuing to draw those who seek the serenity and challenge of the high mountains.
Across the Atlantic, in the vast landscapes of the United States, stand some of the most renowned mountains in the world, each with its unique allure and historical significance. In the state of Washington, Mount Rainier towers over the landscape, a massive stratovolcano that is both feared for its potential eruption and revered for its stunning beauty. This iconic landmark of the pacific northwest is a prominent feature of the Cascade Range and a challenging summit for climbers.
Moving to the heart of Wyoming, the Grand Teton in Grand Teton National Park rises dramatically from the valley of Jackson Hole. Its jagged peaks, part of the Rocky Mountains, have become synonymous with rugged wilderness and natural beauty. The Grand Teton, with its impressive and formidable profile, continues to be a favorite destination for mountaineers and nature lovers.
In the southwestern state of California, the Sierra Nevada range presents Mount Whitney, the highest summit in the contiguous United States. Its elevation of 14,505 feet attracts hikers and climbers from all over, eager to conquer its peak and enjoy the panoramic views from the top.
'''
# Process the text
doc = trained_model(text)

spacy.displacy.render(doc, style="ent", jupyter=True) # display in Jupyter