In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
import torch

# 1. Initialize FastAPI app
app = FastAPI(title="Multilingual Intelligent Assistant")



In [None]:
# 2. Optimized Model Loading (Global scope to load once)
device = 0 if torch.cuda.is_available() else -1
print(f"Loading models on {'GPU' if device == 0 else 'CPU'}...")

# Loading smaller, optimized versions of models for faster inference
gen_pipe = pipeline("text-generation", model="gpt2", device=device)
sum_pipe = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6", device=device)
trans_pipe = pipeline("translation", model="NAMAA-Space/masrawy-english-to-egyptian-arabic-translator-v2.9", device=device)
qa_pipe = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=device)


In [None]:
# 3. Define Request Schema
class AIRequest(BaseModel):
    topic: str
    question: str

# 4. Define Response Schema
class QAResponse(BaseModel):
    question: str
    answer: str

class AIResponse(BaseModel):
    topic: str
    generated_text: str
    summary: str
    translated_summary_ar: str
    question_answer: QAResponse


In [None]:
# 5. API Endpoint
@app.post("/ai-assistant", response_model=AIResponse)
def run_assistant(request: AIRequest):
    try:
        # Step A: Generate Content
        # We use max_new_tokens for precise control over output length
        prompt = f"Explain the impact of {request.topic}:"
        generated = gen_pipe(prompt, max_new_tokens=100, do_sample=True)[0]['generated_text']

        # Step B: Summarize
        summary = sum_pipe(generated, max_length=40, min_length=10, do_sample=False)[0]['summary_text']

        # Step C: Translate Summary to French
        translated = trans_pipe(summary)[0]['translation_text']

        # Step D: Answer Question based on Generated Content
        qa_result = qa_pipe(question=request.question, context=generated)

        # Return structured JSON response
        return {
            "topic": request.topic,
            "generated_text": generated,
            "summary": summary,
            "translated_summary_ar": translated,
            "question_answer": {
                "question": request.question,
                "answer": qa_result['answer']
            }
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


In [None]:
! pip install nest_asyncio

In [None]:
import threading
import uvicorn
import nest_asyncio
from fastapi import FastAPI

# 1. Allow nested loops
nest_asyncio.apply()

# 2. Define a function to run the server
def run_server():
    # 'app' must be the FastAPI instance you defined earlier
    config = uvicorn.Config(app, host="127.0.0.1", port=8000, log_level="info")
    server = uvicorn.Server(config)
    server.run()

# 3. Start the server in a separate thread
# This prevents the "RuntimeError" by not blocking the main notebook loop
server_thread = threading.Thread(target=run_server)
server_thread.start()

print("ðŸš€ Server is running in the background at http://127.0.0.1:8000")

In [None]:
import requests

payload = {
    "topic": "Vitamins Importance",
    "question": "How do Vitamins help health of people?"
}

response = requests.post("http://127.0.0.1:8000/ai-assistant", json=payload)
print(response.json()['topic'])
print(response.json()['generated_text'])
print(response.json()['summary'])
print(response.json()['translated_summary_ar'])