INSTALLING BACKEND DEPENDENCIES

In [None]:
!pip install -q -U langchain==0.0.279
!pip install -q -U torch==2.0.1
!pip install -q -U accelerate==0.22.0
!pip install -q -U sentence_transformers==2.2.2
!pip install -q -U chromadb==0.4.2
!pip install -q -U pdfminer.six
!pip install -q -U bitsandbytes
!pip install -q -U requests
!pip install -q -U bs4

In [None]:
!pip install -q -U torch==2.1.0

In [None]:
!git lfs install
!git clone https://huggingface.co/MBZUAI/LaMini-T5-738M

Git LFS initialized.
Cloning into 'LaMini-T5-738M'...
remote: Enumerating objects: 38, done.[K
remote: Total 38 (delta 0), reused 0 (delta 0), pack-reused 38[K
Unpacking objects: 100% (38/38), 610.56 KiB | 6.43 MiB/s, done.


INSTALLING FASTAPI AND REQUIRED DEPENDENCIES

In [None]:
!pip install fastapi uvicorn nest-asyncio

UPLOAD CODE FOR DOCS FOLDER - COLAB

In [None]:
from google.colab import files
import os

# Create a folder to store the uploaded files
folder_name = 'docs'
os.makedirs(folder_name, exist_ok=True)

# Upload files
uploaded = files.upload()

# Save the uploaded files into the created folder
for filename, content in uploaded.items():
    file_path = os.path.join(folder_name, filename)
    with open(file_path, 'wb') as f:
        f.write(content)

print(f'Uploaded files are saved in the folder: {folder_name}')

Saving hesc101.pdf to hesc101.pdf
Saving hesc102.pdf to hesc102.pdf
Uploaded files are saved in the folder: docs


BACKEND CODES

In [None]:
import os
from os.path import join
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, pipeline as hf_pipeline
from langchain import HuggingFacePipeline
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

# Global variables to store the initialized model and tokenizer
global_model = None
global_tokenizer = None
db = None



# Generating Embeddings from the docs folder
def generate_embeddings():
  from langchain.document_loaders import PyPDFLoader, PDFMinerLoader, DirectoryLoader
  from langchain.embeddings import SentenceTransformerEmbeddings
  from langchain.text_splitter import RecursiveCharacterTextSplitter
  from langchain.vectorstores import Chroma
  from os.path import join
  import os

  global db

  if db is None:
    for root,dir,files in os.walk("docs"):
      for file in files:
          if file.endswith(".pdf"):
              loader = PDFMinerLoader(join(root,file))
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=500)
    texts = text_splitter.split_documents(documents)

    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    db = Chroma.from_documents(texts, embeddings, persist_directory="db")



def load_model():
  import torch
  from transformers import BitsAndBytesConfig, pipeline
  from langchain import HuggingFacePipeline
  from langchain import PromptTemplate, LLMChain
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

  global global_model, global_tokenizer

  if global_model is None or global_tokenizer is None:
    model_id = "OdiaGenAI/mistral_hindi_7b_base_v1"

    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    global_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
    global_tokenizer = AutoTokenizer.from_pretrained(model_id)




def initialize_model():

  generate_embeddings()
  load_model()

def llm_pipeline():
  pipe = hf_pipeline(
      model=global_model,
      tokenizer=global_tokenizer,
      task="text-generation",
      use_cache=True,
      device_map="auto",
      max_length=2000,
      do_sample=True,
      top_k=5,
      temperature=0.01,
      num_return_sequences=1,
      eos_token_id=global_tokenizer.eos_token_id,
      pad_token_id=global_tokenizer.eos_token_id,
  )
  local_llm = HuggingFacePipeline(pipeline=pipe)
  return local_llm

def qa_llm():
    llm=llm_pipeline()
    embeddings=SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    db=Chroma(persist_directory="db", embedding_function=embeddings)
    retriever=db.as_retriever()
    qa=RetrievalQA.from_chain_type(
      llm=llm,
      chain_type="stuff",
      retriever=retriever,
      return_source_documents=True
    )
    return qa



def generate_response(instruction):
  response=''
  qa=qa_llm()
  generation=qa(instruction)
  answer=generation['result']
  return answer, generation


INSTALLING NGROK FOR COLAB

In [None]:
# Install necessary dependencies
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.0.5-py3-none-any.whl (21 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.0.5


In [None]:
!ngrok authtoken 2afsfBetZKrAK1l3f6jYx1WwWjq_VbJaFrpTfpykJMRedZKb

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


FASTAPI CODE---- main.py


In [None]:
# main.py
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from pyngrok import ngrok

# Import your custom functions
# from llm import initialize_model, generate_response  # Make sure to import these functions

app = FastAPI()

class ConfigData(BaseModel):
    class_: str
    subject: str

class UserQuery(BaseModel):
    messages: str

# Enable CORS (Cross-Origin Resource Sharing)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["http://localhost:3000"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize the model once
initialize_model()


@app.post("/settings")
def config(settings: ConfigData):
    try:
        if not settings.class_ or not settings.subject:
            raise HTTPException(status_code=400, detail="Class and subject are required fields")

        response_string = f"Let me provide you the Syllabus for {settings.class_} - {settings.subject}"

        return {"response": response_string, "reasoning": ""}
    except Exception as e:
        print(f"An error occurred: {e}")
        raise HTTPException(status_code=500, detail="Internal Server Error")

@app.post("/home")
def home_route(home: UserQuery):
    try:
        if not home.messages:
            raise HTTPException(status_code=400, detail="Empty value")

        # Call the custom function to generate a response using RetrievalQA
        answer, generation = generate_response(home.messages)

        return {"response": answer, "reasoning": generation}
    except Exception as e:
        print(f"An error occurred: {e}")
        raise HTTPException(status_code=500, detail="Internal Server Error")

# Run the application using ngrok
if __name__ == "__main__":
    import nest_asyncio

    ngrok_tunnel = ngrok.connect(8001)
    print('Public URL:', ngrok_tunnel.public_url)

    nest_asyncio.apply()
    import uvicorn
    uvicorn.run(app, port=8001)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

pytorch_model-00001-of-00008.bin:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

pytorch_model-00002-of-00008.bin:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

pytorch_model-00003-of-00008.bin:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

pytorch_model-00004-of-00008.bin:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

pytorch_model-00005-of-00008.bin:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

pytorch_model-00006-of-00008.bin:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

pytorch_model-00007-of-00008.bin:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

pytorch_model-00008-of-00008.bin:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/487 [00:00<?, ?B/s]

Public URL: https://41bc-34-125-168-224.ngrok-free.app


INFO:     Started server process [433]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)


INFO:     112.134.197.209:0 - "OPTIONS /settings HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /settings HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "OPTIONS /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "OPTIONS /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "OPTIONS /home HTTP/1.1" 200 OK
INFO:     112.134.197.209:0 - "POST /home HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [433]


In [None]:
# !pip install -q -U bitsandbytes
# !pip install -q -U git+https://github.com/huggingface/transformers.git
# !pip install -q -U git+https://github.com/huggingface/peft.git
# !pip install -q -U git+https://github.com/huggingface/accelerate.git
# !pip install -q -U einops
# !pip install -q -U safetensors
# !pip install -q -U torch
# !pip install -q -U xformers
# !pip install -q -U langchain==0.0.279
# !pip install -q -U ctransformers[cuda]
# !pip install -q -U chromadb==0.4.2
# !pip install sentence-transformers

In [None]:
# !pip install unstructured[pdf]

In [None]:
# # llm.py
# import torch
# from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, pipeline as hf_pipeline  # Rename the pipeline import
# from langchain import HuggingFacePipeline
# from langchain import PromptTemplate, LLMChain
# from langchain.globals import set_debug, set_verbose

# # Global variables to store the initialized model and tokenizer
# global_model = None
# global_tokenizer = None

# def initialize_model():

#     global global_model, global_tokenizer

#     import torch
#     print(torch.version.cuda)

#     if global_model is None or global_tokenizer is None:
#         model_id = "OdiaGenAI/mistral_hindi_7b_base_v1"

#         quantization_config = BitsAndBytesConfig(
#             load_in_4bit=True,
#             bnb_4bit_compute_dtype=torch.float16,
#             bnb_4bit_quant_type="nf4",
#             bnb_4bit_use_double_quant=True,
#         )

#         global_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=quantization_config)
#         global_tokenizer = AutoTokenizer.from_pretrained(model_id)

#         set_debug(True)
#         set_verbose(True)

# # Function to generate a response using the initialized model and tokenizer
# def generate_response(question, context=""):
#     global global_model, global_tokenizer

#     if global_model is None or global_tokenizer is None:
#         raise ValueError("Model not initialized. Call initialize_model() first.")

#     text_gen_pipeline = hf_pipeline(  # Use the alias hf_pipeline for the transformers pipeline
#         "text-generation",
#         model=global_model,
#         tokenizer=global_tokenizer,
#         use_cache=True,
#         device_map="auto",
#         max_length=2000,
#         do_sample=True,
#         top_k=5,
#         temperature=0.01,
#         num_return_sequences=1,
#         eos_token_id=global_tokenizer.eos_token_id,
#         pad_token_id=global_tokenizer.eos_token_id,
#     )

#     llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

#     template = """### System:\nBelow is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n\n\n### Instruction:\n{question}\n\n### Input:\n{context}\n\n### Response:\n"""
#     prompt = PromptTemplate(template=template, input_variables=["question", "context"])
#     llm_chain = LLMChain(prompt=prompt, llm=llm)

#     response = llm_chain.run({"question": question, "context": context})
#     return response


In [None]:
# %%capture
# # main.py
# from fastapi import FastAPI, HTTPException
# from fastapi.middleware.cors import CORSMiddleware
# from pydantic import BaseModel
# from pyngrok import ngrok

# # Import your custom functions
# # from llm import initialize_model, generate_response

# app = FastAPI()

# class ConfigData(BaseModel):
#     class_: str
#     subject: str

# class UserQuery(BaseModel):
#     messages: str

# # Enable CORS (Cross-Origin Resource Sharing)
# app.add_middleware(
#     CORSMiddleware,
#     allow_origins=["http://localhost:3000"],
#     allow_credentials=True,
#     allow_methods=["*"],
#     allow_headers=["*"],
# )

# # Initialize the model once
# initialize_model()

# @app.post("/settings")
# def config(settings: ConfigData):
#     try:
#         if not settings.class_ or not settings.subject:
#             raise HTTPException(status_code=400, detail="Class and subject are required fields")

#         response_string = f"Let me provide you the Syllabus for {settings.class_} - {settings.subject}"

#         return {"response": response_string, "reasoning": ""}
#     except Exception as e:
#         print(f"An error occurred: {e}")
#         raise HTTPException(status_code=500, detail="Internal Server Error")

# @app.post("/home")
# def home_route(home: UserQuery):
#     try:
#         if not home.messages:
#             raise HTTPException(status_code=400, detail="Empty value")

#         # Call the custom function to generate a response
#         response = generate_response(home.messages)

#         return {"response": response, "reasoning": ""}
#     except Exception as e:
#         print(f"An error occurred: {e}")
#         raise HTTPException(status_code=500, detail="Internal Server Error")

# # Run the application using ngrok
# if __name__ == "__main__":
#     import nest_asyncio

#     ngrok_tunnel = ngrok.connect(8001)
#     print('Public URL:', ngrok_tunnel.public_url)

#     nest_asyncio.apply()
#     import uvicorn
#     uvicorn.run(app, port=8001)


IMPLEMENTING RAG

In [None]:
# import os
# from chromadb.config import Settings


# CHROMA_SETTINGS = Settings(
#     chroma_db_impl='duckdb+parquet',
#     persist_directory='db',
#     anonymized_telemetry=False
# )

In [None]:
# from langchain.document_loaders import PyPDFLoader, PDFMinerLoader, DirectoryLoader
# from langchain.embeddings import SentenceTransformerEmbeddings
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.vectorstores import Chroma
# from os.path import join
# import os

In [None]:
# for root,dir,files in os.walk("docs"):
#         for file in files:
#             if file.endswith(".pdf"):
#                 loader = PDFMinerLoader(join(root,file))
# documents = loader.load()

In [None]:
# textsplitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=500)
# texts = textsplitter.split_documents(documents)


# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
# db = Chroma.from_documents(texts, embeddings, persist_directory="db", client_settings=CHROMA_SETTINGS)