In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification

# Load the fine-tuned DistilBERT model and tokenizer
model_name = './fine_tuned_distilbert'  # Path to your fine-tuned model folder
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Define label mappings (adjust based on your training labels)
id2label = {0: "O", 1: "B-QUANTITY", 2: "I-QUANTITY", 3: "B-UNIT", 4: "I-UNIT", 5: "B-FOOD", 6: "I-FOOD"}
label2id = {v: k for k, v in id2label.items()}

# Function to extract structured ingredients from recipe text
def extract_ingredients(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt")
    
    # Perform inference using the fine-tuned model
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    
    # Decode tokens and their corresponding labels
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    entities = [id2label[pred.item()] for pred in predictions[0]]
    
    # Combine tokens and labels into a structured format
    results = []
    current_entity = {"type": None, "text": ""}
    
    for token, label in zip(tokens, entities):
        if label == "O":
            if current_entity["type"]:
                results.append(current_entity)
                current_entity = {"type": None, "text": ""}
        else:
            entity_type = label.split("-")[1]
            if current_entity["type"] == entity_type:
                current_entity["text"] += f" {token}"
            else:
                if current_entity["type"]:
                    results.append(current_entity)
                current_entity = {"type": entity_type, "text": token}
    
    if current_entity["type"]:
        results.append(current_entity)
    
    return results

# Test the model on a sample recipe text
recipe_text = "Mix 2 cups of flour with 1 tsp salt"
extracted_ingredients = extract_ingredients(recipe_text)

# Print extracted ingredients
print("Extracted Ingredients:")
for entity in extracted_ingredients:
    print(f"{entity['type']}: {entity['text']}")


Extracted Ingredients:
