In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
!pip install -q fastapi uvicorn pyngrok nest_asyncio langchain transformers faiss-cpu

In [None]:
!pip install -U langchain-community

In [None]:
!pip install -U bitsandbytes

In [None]:
!pip install -U langchain-openai

In [None]:
import json
import torch
import nest_asyncio
import threading
import logging
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List, Dict
from pyngrok import ngrok
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

import zipfile

In [None]:
nest_asyncio.apply()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])

class ChatRequest(BaseModel):
    input: str
    messages: Optional[List[Dict[str, str]]] = None

In [None]:
zip_path = "/content/saved_data.zip"
extract_path = "/content/saved_data"

try:
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction successful!")
except zipfile.BadZipFile:
    print("Bad zip file or corrupted.")
except Exception as e:
    print(f"Error: {e}")

In [None]:
vectorstore_path = "/content/saved_data/faiss_index"
store_path = "/content/saved_data/document_store.json"

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
    openai_api_key="****"
)

vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)

store = InMemoryStore()
with open(store_path, "r") as f:
    store_data = json.load(f)
for key, doc_dict in store_data.items():
    doc = Document(page_content=doc_dict["page_content"], metadata=doc_dict["metadata"])
    store.mset([(key, doc)])

child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=None,
    search_kwargs={"k": 3}
)


In [None]:
from transformers import AutoTokenizer
from unsloth import FastLanguageModel
from huggingface_hub import snapshot_download

max_seq_length = 16384
dtype = None
load_in_4bit = True

adapter_map = {
    "sherlock": "ahmedkhaled74/sherlock_lora_v4",
    "moriarity": "ahmedkhaled74/moriarity_lora",
}

local_adapter_paths = {
    name: snapshot_download(repo_id=repo_id)
    for name, repo_id in adapter_map.items()
}

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = local_adapter_paths["sherlock"],
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
model = FastLanguageModel.for_inference(model)

def change_adapter(character_name: str):
    character_name = character_name.lower()

    if character_name not in local_adapter_paths:
        raise ValueError(f"Unknown character_name: {character_name}. Available: {list(local_adapter_paths.keys())}")

    new_adapter_path = local_adapter_paths[character_name]

    model.load_adapter(new_adapter_path, adapter_name="default", replace=True)
    print(f"Switched to {character_name.title()} adapter.")

change_adapter("sherlock")

In [None]:
change_adapter("sherlock")

In [None]:
def create_template(question,possible_answers):
  template = """Use the context you have and answer based on it. Use the context to answer any question about events while impersonating your character.
    Scenes:
    {possible_answers}
    Question: {question}
    """
  return template.format(possible_answers=possible_answers, question=question)


In [None]:
from transformers import TextStreamer
def character_answer(question: str, messages: Optional[List[Dict[str, str]]] = None) -> str:
    relevant_docs = retriever.get_relevant_documents(question)
    possible_answers = ""
    for i, doc in enumerate(relevant_docs[:3]):
        possible_answers += f"Answer {i + 1}:\n{doc.page_content}\n"

    context_prompt = create_template(possible_answers=possible_answers, question=question)

    full_prompt = []

    if messages:
        full_prompt.extend(messages)

    full_prompt.append({"role": "user", "content": context_prompt})

    inputs = tokenizer.apply_chat_template(
        full_prompt,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=2056,
        use_cache=True
    )

    input_length = inputs.shape[1]

    decoded = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

    return decoded.strip()



In [None]:
import uvicorn
from fastapi import Request

class AdapterRequest(BaseModel):
    character_name: str

@app.post("/change_adapter")
def change_adapter(request: AdapterRequest):
    character_name = request.character_name.lower()

    if character_name not in local_adapter_paths:
        raise HTTPException(
            status_code=400,
            detail=f"Unknown character_name '{character_name}'. Available: {list(local_adapter_paths.keys())}"
        )

    new_adapter_path = local_adapter_paths[character_name]

    try:
        model.load_adapter(new_adapter_path, adapter_name="default", replace=True)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to load adapter: {str(e)}")

    return {"message": f"Switched to {character_name.title()} adapter."}

@app.post("/chat/character")
async def chat_character(request: Request):
    try:
        body = await request.json()
        question = body.get("input", "").strip()
        if not question:
            raise HTTPException(status_code=400, detail="Missing 'input' field")

        messages = body.get("messages", [])

        logger.info(f"Received question: {question}")
        answer = character_answer(question, messages)

        return {"response": answer}
    except Exception as e:
        logger.error(f"Error in /chat/character: {str(e)}")
        return {"response": f"Error: {str(e)}", "error": True}

def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8001)

server_thread = threading.Thread(target=run_server)
server_thread.start()

ngrok.set_auth_token("****")
public_url = ngrok.connect(8001)
print(f"🚀 API Base: {public_url}")
print(f"🧠 Sherlock endpoint: {public_url}/chat/character")

In [None]:
question = "introduce yourself"
print(character_answer(question))