In [None]:
import pandas as pd
from fastapi import FastAPI
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, VectorParams, Distance
from langchain import LLMChain, PromptTemplate
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import uuid
import uvicorn
import nest_asyncio

# Apply nest_asyncio for running in Jupyter
nest_asyncio.apply()

# Initialize FastAPI app
app = FastAPI()

# Initialize Qdrant clientF
client = QdrantClient("localhost", port=6333)

#sentence-transformers/multi-qa-mpnet-base-dot-v1
#sentence-transformers/all-mpnet-base-v2
MODEL = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# Load data and create collection
df = pd.read_csv('output_embeddings.csv')
text_column_name = 'text'

# Recreate collection in Qdrant
client.recreate_collection(
    collection_name="similar_text",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

# Insert data into Qdrant
for index, row in df.iterrows():
    text = row[text_column_name]
    text_embeddings = MODEL.encode(text).tolist()
    id = str(uuid.uuid4())
    payload = {"text": text, "text_embeddings": text_embeddings}
    client.upsert(
        collection_name="similar_text",
        wait=True,
        points=[PointStruct(id=id, vector=text_embeddings, payload=payload)]
    )

# Define the input schema for similarity search
class Query(BaseModel):
    question: str

@app.post("/search")
def search_similar_text(query: Query):
    question_embeddings = MODEL.encode(query.question).tolist()
    search_result = client.search(
        collection_name="similar_text",
        query_vector=question_embeddings,
        limit=3
    )
    results = [
        {"text": result.payload["text"], "similarity_score": result.score}
        for result in search_result
    ]
    return results

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

In [None]:
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient

import uvicorn

app = FastAPI()

# Initialize SentenceTransformer for embeddings
MODEL = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Initialize Qdrant client
client = QdrantClient(url="http://localhost:6333")

# Initialize GPT-Neo model
model_name = "EleutherAI/gpt-neo-2.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)

# Text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=100,
    temperature=0.2,
    repetition_penalty=1.5
)

# Wrap pipeline with HuggingFacePipeline for LangChain
huggingface_llm = HuggingFacePipeline(pipeline=generator)

# Define LangChain prompt
prompt = PromptTemplate(
    template=(
        "Based on the following context, answer the question concisely without repeating the context: "
        "Context: {context} "
        "Question: {question} "
        "Answer:"
    ),
    input_variables=["context", "question"]
)

llm_chain = LLMChain(prompt=prompt, llm=huggingface_llm)

# Input schema
class LLMQuery(BaseModel):
    question: str

@app.post("/llm_query")
def llm_query(query: LLMQuery):
    # Generate embeddings and search in Qdrant
    question_embeddings = MODEL.encode(query.question).tolist()
    search_result = client.search(
        collection_name="similar_text",
        query_vector=question_embeddings,
        limit=1
    )
    context = " ".join([item.payload["text"] for item in search_result]) if search_result else ""

    # Generate the answer
    raw_answer = llm_chain.run(context=context, question=query.question)

    # Trim the output to isolate the relevant text between the first and second occurrence of "Question"
    first_question_index = raw_answer.find("Question")
    second_question_index = raw_answer.find("Question", first_question_index + 1)

    if first_question_index != -1 and second_question_index != -1:
        clean_answer = raw_answer[first_question_index + len("Question:"):second_question_index].strip()
    else:
        # Fallback if structure is not as expected
        clean_answer = raw_answer.strip()

    # Return the response
    response = {
        "question": query.question,
        "context": context,
        "answer": clean_answer
    }
    return JSONResponse(content=response, headers={"Content-Type": "application/json"})

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)