# Entity Recognition with Hugging Face

Fine-tunes a Hugging Face model to identify named entities in text.

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
from datasets import load_dataset
import pandas as pd

In [None]:
#Loading model and relative tokenizer
MODEL_NAME = "dbmdz/bert-large-cased-finetuned-conll03-english"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME)

In [None]:
#Creating NER pipeline
ner_pipeline = pipeline(task="token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

In [None]:
#Helper methods
def analyze_text(text):
  """
  Perform NER analysis on a text
  Returs dataframe with results
  """
  ner_results = ner_pipeline(text)
  df = pd.DataFrame(ner_results)
  df = df.rename(columns={"word": "Parola", "entity_group": "Tipo", "score": "Confidenza"})
  return df

def analyze_batch(sentences):
  """
  Perform NER analysis on a batch of sentences
  Returs dataframe with results
  """
  batch_results = ner_pipeline(sentences)
  results = []
  for i, sentence_results in enumerate(batch_results):
    for entity in sentence_results:
      results.append({
          "Frase": sentences[i],
          "Parola": entity["word"],
          "Tipo": entity["entity_group"],
          "Confidenza": entity["score"]
      })
  return pd.DataFrame(results)

def save_results_to_csv(df, filename):
  """
  Save results to CSV file
  """
  df.to_csv(filename, index=False)

In [None]:
#Test (single)
text = "Juventus is an italian football team based in Turin"

#Analysis
df_results = analyze_text(text)
print(df_results)

#Display recognized entities
unique_entities = df_results["Tipo"].unique()
print(unique_entities)