In [None]:
import pandas as pd
from fastapi import FastAPI
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, VectorParams, Distance
from langchain import LLMChain, PromptTemplate
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import uuid
import uvicorn
import nest_asyncio

# Apply nest_asyncio for running in Jupyter
nest_asyncio.apply()

# Initialize FastAPI app
app = FastAPI()

# Initialize Qdrant clientF
client = QdrantClient("localhost", port=6333)

#sentence-transformers/multi-qa-mpnet-base-dot-v1
#sentence-transformers/all-mpnet-base-v2
MODEL = SentenceTransformer("sentence-transformers/multi-qa-mpnet-base-dot-v1")

# Load data and create collection
df = pd.read_csv('output_embeddings.csv')
text_column_name = 'text'

# Recreate collection in Qdrant
client.recreate_collection(
    collection_name="similar_text",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

# Insert data into Qdrant
for index, row in df.iterrows():
    text = row[text_column_name]
    text_embeddings = MODEL.encode(text).tolist()
    id = str(uuid.uuid4())
    payload = {"text": text, "text_embeddings": text_embeddings}
    client.upsert(
        collection_name="similar_text",
        wait=True,
        points=[PointStruct(id=id, vector=text_embeddings, payload=payload)]
    )

# Define the input schema for similarity search
class Query(BaseModel):
    question: str

@app.post("/search")
def search_similar_text(query: Query):
    question_embeddings = MODEL.encode(query.question).tolist()
    search_result = client.search(
        collection_name="similar_text",
        query_vector=question_embeddings,
        limit=3
    )
    results = [
        {"text": result.payload["text"], "similarity_score": result.score}
        for result in search_result
    ]
    return results

In [None]:
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from transformers import AutoTokenizer, pipeline
from transformers import Qwen2ForCausalLM
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient

import uvicorn

app = FastAPI()

# Initialize SentenceTransformer for embeddings
MODEL = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-dot-v1')

# Initialize Qdrant client
client = QdrantClient(url="http://localhost:6333")

# Initialize Qwen2 model for text generation
model_name = "Qwen/Qwen2-0.5B-Chat"  # Update with the appropriate Qwen model name
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = Qwen2ForCausalLM.from_pretrained(model_name, trust_remote_code=True)
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,  # Allows for longer answers
    temperature=0.5,
    repetition_penalty=1.3
)

# Input schema
class LLMQuery(BaseModel):
    question: str

@app.post("/llm_query")
def llm_query(query: LLMQuery):
    # Step 1: Embed the query question
    question_embeddings = MODEL.encode(query.question).tolist()

    # Step 2: Search for similar context in Qdrant
    search_result = client.search(
        collection_name="similar_text",  # Replace with your collection name
        query_vector=question_embeddings,
        limit=3  # Retrieve the top 1 result
    )

    # Extract context from Qdrant results
    context = " ".join([item.payload["text"] for item in search_result]) if search_result else "No relevant context found."

    # Debugging: Log the context and question
    print(f"Context: {context}")
    print(f"Question: {query.question}")

    # Step 3: Prepare the prompt for Qwen2
    prompt = (
        f"Given the following context, provide a short answer to the question.\n\n"
        f"Context:\n{context}\n\n"
        f"Question:\n{query.question}\n\n"
        f"Answer:"
    )

    # Step 4: Generate an answer using the Qwen2 model
    raw_answer = generator(prompt, return_full_text=False)[0]["generated_text"]

    # Debugging: Log the raw answer
    print(f"Raw Answer: {raw_answer}")

    # Extract the answer from the model output
    clean_answer = raw_answer.strip()

    # Step 5: Return the response
    response = {
        "question": query.question,
        "context": context,
        "answer": clean_answer or "No answer generated."
    }
    return JSONResponse(content=response, headers={"Content-Type": "application/json"})

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)