In [None]:
! pip install transformers

In [11]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
import torch
# Load the fine-tuned model
model = AutoModelForTokenClassification.from_pretrained("ayoubkirouane/BERT-base_NER-ar")
tokenizer = AutoTokenizer.from_pretrained("ayoubkirouane/BERT-base_NER-ar")

# Tokenize your input text
text = "أبو ظبي هي عاصمة دولة الإمارات العربية المتحدة."
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))

# Convert tokens to input IDs
input_ids = tokenizer.convert_tokens_to_ids(tokens)

# Perform NER inference
with torch.no_grad():
    outputs = model(torch.tensor([input_ids]))

# Get the predicted labels for each token
predicted_labels = outputs[0].argmax(dim=2).cpu().numpy()[0]

# Map label IDs to human-readable labels
predicted_labels = [model.config.id2label[label_id] for label_id in predicted_labels]

# Print the tokenized text and its associated labels
for token, label in zip(tokens, predicted_labels):
    print(f"Token: {token}, Label: {label}")

Token: [CLS], Label: LABEL_0
Token: أبو, Label: LABEL_5
Token: ظ, Label: LABEL_6
Token: ##بي, Label: LABEL_6
Token: هي, Label: LABEL_0
Token: عاصمة, Label: LABEL_0
Token: دولة, Label: LABEL_0
Token: الإمارات, Label: LABEL_5
Token: العربية, Label: LABEL_6
Token: المتحدة, Label: LABEL_6
Token: ., Label: LABEL_0
Token: [SEP], Label: LABEL_0


In [13]:
from transformers import pipeline

# Load the NER pipeline for BERT-base_NER-ar
pipe = pipeline("token-classification", model="ayoubkirouane/BERT-base_NER-ar")


# Define the input text
text = "أبو ظبي هي عاصمة دولة الإمارات العربية المتحدة."

# Run NER on the input text
results = pipe(text)

# Print the named entities and their labels
for entity in results:
    print(f"Entity: {entity['word']}, Label: {entity['entity']}")


Entity: أبو, Label: LABEL_5
Entity: ظ, Label: LABEL_6
Entity: ##بي, Label: LABEL_6
Entity: هي, Label: LABEL_0
Entity: عاصمة, Label: LABEL_0
Entity: دولة, Label: LABEL_0
Entity: الإمارات, Label: LABEL_5
Entity: العربية, Label: LABEL_6
Entity: المتحدة, Label: LABEL_6
Entity: ., Label: LABEL_0


In [14]:
! pip install -q  gradio

In [None]:
import gradio as gr
from transformers import AutoModelForTokenClassification, AutoTokenizer
import torch

# Load the fine-tuned model and tokenizer
model = AutoModelForTokenClassification.from_pretrained("ayoubkirouane/BERT-base_NER-ar")
tokenizer = AutoTokenizer.from_pretrained("ayoubkirouane/BERT-base_NER-ar")

# Create a function to perform NER
def perform_ner(text):
    # Tokenize the input text
    tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))

    # Convert tokens to input IDs
    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # Perform NER inference
    with torch.no_grad():
        outputs = model(torch.tensor([input_ids]))

    # Get the predicted labels for each token
    predicted_labels = outputs[0].argmax(dim=2).cpu().numpy()[0]

    # Map label IDs to human-readable labels
    predicted_labels = [model.config.id2label[label_id] for label_id in predicted_labels]

    # Create a list of entities and their labels
    entities = [{"entity": token, "label": label} for token, label in zip(tokens, predicted_labels)]

    return entities

# Create a Gradio interface
iface = gr.Interface(
    fn=perform_ner,
    inputs="text",
    outputs="json",
    live=True,
    title="Arabic Named Entity Recognition",
    description="Enter Arabic text to extract named entities (e.g., names of people, locations, organizations).",
)

# Launch the Gradio app
iface.launch(share = True , debug=True )
