In [3]:
import os
import re
import json
import requests
import gradio as gr
from dotenv import load_dotenv, find_dotenv

# Load environment variables
_ = load_dotenv(find_dotenv())
hf_api_key = os.environ['HF_API_KEY']
API_URL = os.environ['HF_API_NER_BASE']

# Call Hugging Face endpoint
def get_completion(inputs, parameters=None, ENDPOINT_URL=API_URL): 
    headers = {
        "Authorization": f"Bearer {hf_api_key}",
        "Content-Type": "application/json"
    }
    data = {"inputs": inputs}
    if parameters is not None:
        data["parameters"] = parameters

    response = requests.post(ENDPOINT_URL, headers=headers, data=json.dumps(data))
    return json.loads(response.content.decode("utf-8"))

# Merge tokens for full entities
def merge_tokens(tokens):
    merged_tokens = []
    for token in tokens:
        if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
            last_token = merged_tokens[-1]
            last_token['word'] += token['word'].replace('##', '')
            last_token['end'] = token['end']
            last_token['score'] = (last_token['score'] + token['score']) / 2
        else:
            merged_tokens.append(token)
    return merged_tokens

# Find years (like 1900â€“2100)
def find_years(text):
    matches = re.finditer(r'\b(1[89]\d{2}|20\d{2}|2100)\b', text)
    year_entities = []
    for match in matches:
        year_entities.append({
            "entity": "YEAR",
            "word": match.group(),
            "start": match.start(),
            "end": match.end(),
            "score": 1.0
        })
    return year_entities

# Final NER function
def ner(input):
    output = get_completion(input)
    merged_tokens = merge_tokens(output)
    year_tokens = find_years(input)
    all_entities = merged_tokens + year_tokens
    return {"text": input, "entities": all_entities}

# Gradio Interface
gr.close_all()
demo = gr.Interface(
    fn=ner,
    inputs=[gr.Textbox(label="Text to find entities", lines=2)],
    outputs=[gr.HighlightedText(label="Text with entities")],
    title="NER with dslim/bert-base-NER + Year Detection",
    description="Find named entities and years (e.g., 1999, 2023) using the `dslim/bert-base-NER` model and regex.",
    allow_flagging="never",
    examples=[
        "My name is Andrew, I'm building DeeplearningAI and I live in California",
        "Marie Curie won the Nobel Prize in Physics in 1903.",
        "The company was founded in 2020 and expanded in 2023.",
        "My name is Poli, I live in Vienna and work at HuggingFace"
    ]
)

demo.launch(share=True, server_port=int(os.environ['PORT4']))


Closing server running on port: 7863
Closing server running on port: 7863
Closing server running on port: 7862
Running on local URL:  https://0.0.0.0:7863
IMPORTANT: You are using gradio version 3.37.0, however version 4.44.1 is available, please upgrade.
--------

Could not create share link. Missing file: /usr/local/lib/python3.9/site-packages/gradio/frpc_linux_amd64_v0.2. 

Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: 

1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64
2. Rename the downloaded file to: frpc_linux_amd64_v0.2
3. Move the file to this location: /usr/local/lib/python3.9/site-packages/gradio


