In [None]:
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth
!pip install --upgrade gradio groq
!pip install --upgrade --quiet  langchain langchain-community langchainhub
!pip install qdrant-client>=1.1.1 sentence-transformers
!pip install langchain langchain-community 

In [None]:
import json
import time
import torch
import numpy as np
import gradio as gr
import pandas as pd
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from langchain import HuggingFacePipeline
from langchain.llms import CTransformers
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
from langchain import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
import warnings
warnings.filterwarnings('ignore')

client = QdrantClient(":memory:")

with open('ku.json', 'r') as f:
    data = json.load(f)


# encoder = SentenceTransformer('thenlper/gte-large') # performed good but not able to catch outside context query
encoder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') # works good


def create_upload(collection_name,encoding_column):
    client.recreate_collection(
          collection_name=collection_name,
          vectors_config=models.VectorParams(
              size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
              distance=models.Distance.COSINE,
          ), )

    client.upload_points(
          collection_name=collection_name,
          points=[
              models.PointStruct(
                  id=idx, vector=encoder.encode(doc[encoding_column]).tolist(), payload={'input':doc["input"],'output':doc['output']}
              )
              for idx, doc in enumerate(data)
          ],
      )

def search(collection_name,query):
    hits = client.search(
      collection_name=collection_name,
      query_vector=encoder.encode(query).tolist(),
      limit=6,
    )
    return hits

In [None]:
create_upload('ins_db','instruction')
create_upload('out_db','output')

In [None]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "KU_Prospectus_LLM",  
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(model)  

In [None]:
my_prompt = """You are a helpful chatbot for "Karachi University" named "Zaroon". You will respond only to questions regarding Karachi University's prospectus.

Introduce yourself "Assalamualikum , this is zaroor from Karachi University. What can i help you?".

ATTENTION!!! After answering correctly, stop writing.
ATTENTION!!! DO NOT MAKE UP ANSWERS.
ATTENTION!!! Return data as formatted as possible.
ATTENTION!!! If user asks anything outside of university questions, say "I don't know".

Example (from prospectus):
Input: What undergraduate programs are offered at Karachi University?
Output: Karachi University offers undergraduate programs in disciplines such as Arts, Science, Business Administration, and Computer Science, among others. For detailed information, please refer to the prospectus.

Example (not from prospectus):
Inout: What is the population of Karachi?
Output: I don't know. I only respond to questions regarding Karachi University's prospectus.

Here is the context from database: \n {answers} \n

FIRST CHECK IF CONTEXT FROM DATABASE IS AT ALL RELATED TO USER QUERY IF YES THEN FORM AN ANSWER FROM CONTEXT AND YOUR INFORMATION THEN ANSWER IF NOT RELATED THEN RESPOND "I Can't Answer This".


### Instruction: {ins}
### Input: {user_input}
### Response: {out}
"""

In [None]:
from groq import Groq
import re

def routing(routing_query):
    client = Groq(api_key='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')
    completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[
            {
                "role": "system",
                "content": """You are a genius and a helpful chatbot that helps decide whether the given query should be sent to "University Prospectus Chatbot" or "Outside Domain".                 
                Example #1:
                    Input: What undergraduate programs are offered at Karachi University?
                    Output: University Prospectus Chatbot
                Example #2:
                    Input: What is the population of Karachi?
                    Output: Outside Domain
                Given the user query just tell the user if it belongs to "University Prospectus Chatbot" or "Outside Domain"."""
            },
            {
                "role": "user",
                "content": routing_query
            },
        ],
        temperature=1,
        max_tokens=1024,
        top_p=1,
        stream=False,
        stop=None,
    )
    resp = completion.choices[0].message.content
    print("RESP:", resp, type(resp))

    if re.search(r'Outside Domain', resp):
        return 'Outside Domain'
    elif re.search(r'University Prospectus Chatbot', resp):
        return 'University Prospectus Chatbot'
    else:
        return "Unknown"

In [None]:
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer
import torch

def generate_response(query, input_context="", max_new_tokens=1024):
    if routing(query) == "University Prospectus Chatbot":
        ans = ""
        for a in search('out_db', query):
            if a.score>0.3:
              ans += a.payload['output'] + '\n\n'
            else: 
                ans += ''
        

        prompt = my_prompt.format(answers=ans, user_input=query, ins='You are a helpful chatbot at "Karachi University". You introduce yourself as "Zaroon" and only answer questions regarding university prospectus', out='')
        print(prompt)
        inputs = tokenizer([prompt], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
        
        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
        
        generation_kwargs = dict(
            inputs,
            streamer=streamer,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            top_p=0.95,
            top_k=50,
            temperature=0.7,
        )
        
        thread = Thread(target=model.generate, kwargs=generation_kwargs)
        thread.start()
        
        generated_text = ""
        for new_text in streamer:
            generated_text += new_text
        return generated_text
    elif routing(query) == "Outside Domain":
        return "I am sorry, I'm not sure I was trained on this."

def chat(query, history):
    response = generate_response(query)
    if response == "I Can't Answer This":
        history.append((query, response))
    else:
        history.append((query, response.split('### Response:')[-1].replace('\n', '').replace('"','').replace('output:','').replace('answer:','').replace('*','').replace('Answer:','').strip()))

    return history, ""

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(label="Enter your query", placeholder="Type your message here...")
    submit_button = gr.Button("Submit")
    clear_button = gr.Button("Clear Conversation")

    submit_button.click(chat, inputs=[msg, chatbot], outputs=[chatbot, msg])
    clear_button.click(lambda: ([], ""), outputs=[chatbot, msg], queue=False)

if __name__ == "__main__":
    demo.queue().launch(share=True, inbrowser=True, debug=True)