In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
from bs4 import BeautifulSoup

app = FastAPI()

class HTMLInput(BaseModel):
    html_content: str

# Load BERT model for NER
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

def advanced_extract_attributes(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    text_content = soup.get_text()
    ner_results = nlp(text_content)
    
    # Extract relevant attributes
    product_names = [entity['word'] for entity in ner_results if entity['entity'] == 'B-PRODUCT']
    prices = [tag.get_text() for tag in soup.find_all('span', class_='price')]
    descriptions = [tag.get_text() for tag in soup.find_all('div', class_='description')]
    images = [tag['src'] for tag in soup.find_all('img', class_='product-image')]
    
    attributes = {
        'product_names': product_names,
        'prices': prices,
        'descriptions': descriptions,
        'images': images
    }
    
    return attributes

@app.post("/extract")
async def extract_info(request: HTMLInput):
    html_content = request.html_content
    result = advanced_extract_attributes(html_content)
    return result

# Example usage:
# Input HTML Block:
# <html>
# <head><title>Product Page</title></head>
# <body>
# <h1 class="product-name">Sample Product</h1>
# <span class="price">$19.99</span>
# <div class="description">This is a sample product description.</div>
# <img class="product-image" src="sample-product.jpg"/>
# </body>
# </html>
#
# Corresponding JSON Output:
# {
#   "product_names": ["Sample Product"],
#   "prices": ["$19.99"],
#   "descriptions": ["This is a sample product description."],
#   "images": ["sample-product.jpg"]
# }
