In [10]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional
from bs4 import BeautifulSoup
import requests
import json
import nest_asyncio
import uvicorn

In [None]:
def extract_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extraer el texto de los párrafos
    paragraphs = soup.find_all('p')
    text = ' '.join([para.get_text() for para in paragraphs])
    
    return text

In [None]:
#Esto es una funcion que incluye el prompt
def generate_vehicle_specs(url, description):
    context = extract_text_from_url(url)
    
    prompt_template = """
        You are an assistant that extracts technical specifications for vehicles. Given a car, extract and return the data in JSON format with the following fields:

        1. "brand" (string): The brand of the vehicle (e.g., "Toyota").
        2. "model" (string): The model of the vehicle (e.g., "Corolla").
        3. "version" (string): The version or complete version of the vehicle (if applicable).
        4. "year" (integer): The year of manufacturing of the vehicle.
        5. "body_type" (string): The type of the vehicle's body (e.g., "sedan", "SUV").
        6. "doors" (integer): The number of doors the vehicle has.
        7. "status" (string): Whether the vehicle is currently "for sale", "discontinued", or other relevant status.
        8. "length" (integer): The length of the vehicle in millimeters (mm).
        9. "seats" (integer): The number of seats in the vehicle.
        10. "price" (number): The price of the vehicle in euros (€).
        11. "technology" (string): The technology of the vehicle (e.g., "hybrid", "electric", "combustion").
        12. "transmission" (string): The type of transmission (e.g., "manual", "automatic").
        13. "fuel_consumption" (number): The combined fuel consumption in liters per 100 kilometers (l/100km).
        14. "fuel_type" (string): The type of fuel the vehicle uses (e.g., "gasoline", "diesel", "electric").
        15. "electric_range" (integer): The electric range of the vehicle in kilometers (km), if applicable.
        16. "battery_capacity" (number): The gross or net capacity of the battery in kilowatt-hours (kWh), if applicable.
        17. "electric_consumption" (number): The combined electric consumption in kilowatt-hours per 100 kilometers (kWh/100km), if applicable.
        18. "charging_time" (number): The charging time in alternating current (AC) in hours (h), if applicable.
        19. "duplicate_date" (string): A date that helps to distinguish between duplicates in the format "MMM-YY" (e.g., "Sep-24").
        20. "max_power" (object): The maximum power of the vehicle in horsepower (CV) and kilowatts (kW).
            - "cv" (integer): Horsepower of the vehicle.
            - "kw" (integer): Kilowatts of the vehicle.
        21. "acceleration" (number): The time it takes to accelerate from 0 to 100 km/h in seconds.
        22. "displacement" (integer): The engine displacement in cubic centimeters (cc).
        23. "environmental_label" (string): The vehicle's environmental label (e.g., "Euro 6").
        24. "co2_emissions" (number): The CO2 emissions in grams per kilometer (gCO2/km).
        25. "tank_capacity" (number): The fuel tank capacity in liters (l) or kilograms (kg), depending on fuel type.
        26. "max_speed" (number): The maximum speed of the vehicle in kilometers per hour (km/h).
        27. "maintenance_costs" (number): The monthly maintenance costs in euros per month (€/month).
        28. "euro_ncap_rating" (integer): The EuroNCAP rating in stars.
        
        Return the result in JSON format. If a value is not available, set it to `null`.

        Here is the description of the vehicle: {description}
    """

    # Utilizar el PromptTemplate de LangChain
    prompt = PromptTemplate(
        input_variables=["description"],
        template=prompt_template
    )
    
    # Usamos la descripción extraída de la web como input
    final_prompt = prompt.format(description=context[:2000])  # Limitamos el texto si es demasiado largo
    
    # Llamamos al modelo LLM de OpenAI para generar la respuesta
    response = llm(final_prompt)
    
    # Convertimos la respuesta a JSON
    try:
        vehicle_specs = json.loads(response)
    except json.JSONDecodeError:
        vehicle_specs = {"error": "No se pudo generar el JSON correctamente."}
    
    return vehicle_specs

In [8]:
# Configura la API Key de OpenAI
os.environ["OPENAI_API_KEY"] = ""

In [9]:
# Crear el objeto OpenAI LLM
llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
# Activar asyncio para permitir que el servidor corra en un notebook
nest_asyncio.apply()

# Crear la instancia de la aplicación FastAPI
app = FastAPI()

# Modelo de datos para la solicitud
class VehicleRequest(BaseModel):
    url: Optional[str] = None
    description: Optional[str] = None

In [None]:
# Ruta POST para recibir la descripción del vehículo o la URL
@app.post("/extract_specs/")
async def extract_vehicle_specs(request: VehicleRequest):
    if request.url:
        try:
            # Extraer la descripción desde la URL si se proporciona
            description = extract_text_from_url(request.url)
        except Exception as e:
            raise HTTPException(status_code=400, detail="No se pudo extraer la descripción desde la URL proporcionada.")
    elif request.description:
        # Usar la descripción proporcionada en el cuerpo del request
        description = request.description
    else:
        raise HTTPException(status_code=400, detail="Debe proporcionar una URL o una descripción del vehículo.")
    
    # Generar las especificaciones del vehículo en JSON usando la descripción
    vehicle_specs = generate_vehicle_specs(description)
    
    return vehicle_specs

In [None]:
# Esto aqui no se si va a funcionar, degeria ser para un .py
# Esto aqui no va a funcionar 
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)