This section utilizes Faiss for
#Semantic search,
leveraging the "all-mpnet-base-v2" model as the sentence transformer.

In [None]:
!pip install -q datasets sentence-transformers faiss-cpu accelerate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m47.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source 

In [None]:
from datasets import load_dataset, DatasetDict, Dataset
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os


In [None]:
dataset = load_dataset('csv', data_files='/content/combined_sheets.csv')

ST = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

Generating train split: 0 examples [00:00, ? examples/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
sheet_name_mapping = {name: idx for idx, name in enumerate(set(dataset['train']['Year_Round']))}

In [None]:
# Function to embed the data and include criterion information
def embed_with_criterion(batch):

    information = [q if not p else p + " " + q for p, q in zip(batch["Preamble Text"], batch["Question"])]   # Adjust the column names if necessary
    embeddings = ST.encode(information)

    # criterion information embeddings using the mapping
    criterion_yr_rank = np.array([sheet_name_mapping[name] for name in batch["Year_Round"]], dtype=np.float32).reshape(-1, 1)
    modified_embeddings = np.hstack((embeddings, criterion_yr_rank))

    return {"embeddings": modified_embeddings}

# Apply the embedding function to the dataset
dataset = dataset.map(embed_with_criterion, batched=True, batch_size=16)

Map:   0%|          | 0/15850 [00:00<?, ? examples/s]

In [None]:
# Save the dataset and FAISS index locally
save_path = '/content/embedded_dataset'
os.makedirs(save_path, exist_ok=True)
dataset.save_to_disk(save_path)
dataset["train"].add_faiss_index(column="embeddings")
dataset["train"].save_faiss_index("embeddings", save_path + '/faiss_index')

dataset = DatasetDict.load_from_disk(save_path)
dataset["train"].load_faiss_index("embeddings", save_path + '/faiss_index')

Saving the dataset (0/1 shards):   0%|          | 0/15850 [00:00<?, ? examples/s]

  0%|          | 0/16 [00:00<?, ?it/s]

In [None]:
# Function to search for the most similar entries considering the criterion
def search_with_criterion(query: str, k: int = 4, yr_rank_value= None):
    """A function that embeds a new query and returns the most probable results considering the criterion"""
    embedded_query = ST.encode(query)  # Embed new query


    criterion_yr_rank = np.array([sheet_name_mapping[yr_rank_value]], dtype=np.float32).reshape(1, 1)
    modified_query_embedding = np.hstack((embedded_query.reshape(1, -1), criterion_yr_rank))

    # Retrieve results
    scores, retrieved_examples = dataset["train"].get_nearest_examples(
        "embeddings", modified_query_embedding, k=k
    )

    return scores, retrieved_examples


year_value= "2021"
rank_value= "contest 10"
# Example usage
query = "Ask a physics question from 2021"
scores, retrieved_examples = search_with_criterion(query, k=10, yr_rank_value= f"{year_value} NSMQ {rank_value}")
print(scores)
print(retrieved_examples["Subject"])

[1.5193869 1.583083  1.585773  1.5895677 1.593563  1.601298  1.6100397
 1.6202786 1.670137  1.6743405]
['Physics, Chemistry', 'Physics', 'Physics', 'Physics', 'Mathematics', 'Physics', 'Mathematics', 'Physics', 'Chemistry', 'Physics']


# LLM access

In [None]:

!pip install -q accelerate bitsandbytes
!pip install -q oauth2client pypdf sentence_transformers
!pip install -q transformers einops accelerate bitsandbytes

In [None]:
!pip install -q huggingface_hub
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from google.colab import userdata
userdata.get('HF_TOKEN')

model_id = "mistralai/Mistral-7B-Instruct-v0.3"

# use quantization to lower GPU usage
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    quantization_config=bnb_config
)


tokenizer_config.json:   0%|          | 0.00/137k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
Round_number= [
    {
        "Round 1": "Fundamentals on Biology, Physics, Chemistry and Maths where each team receives 2/3 sets of questions depending on the stage of the competition you're in.",
        "Round 2": "Speed Race. Quick successive questioning to user and you have to answer a question as quickly and with no delay in providing answers.",
        "Round 3": "Problem of the Day. A question is posed to all three schools and given 3 mins to provide and answer to it.",
        "Round 4": "True/False. Each subject has 2 sets of questions to be answered",
        "Round 5": "Riddles. Each subject has a riddle to answer."
     }
]

In [None]:
storeConvo= [

]

In [None]:
def slash(query):

  templ_prompt= f"""

    From the query provided
    {query}

    extract relevant data from it
      Your output should always be in the provided JSON fomart
        -fill the most apporpriate field below
        -if apporpriate data doesn't exist in the new query put "N/A" at the space

           {{
                "year": "Put the YEAR in the query if it was provided eg. "2020" ",
                "suject": "Put the SUBJECT in the query here eg. "Chemistry" ",
                "round": "space for specific Round Number:{Round_number} eg. Round 1",
                "keywords": "Put all other keywords here. eg. what is matter? keyword is matter"
            }}
    """
  return templ_prompt



In [None]:
ASSISTANT = """
I am an assistant for high school students in Ghana.
And my goal is to help them preparing effectively for the National Science and Math Quize(NSMQ).
"""
# If you don't know the answer, just say "I do not know." Don't make up an answer.

def General_prompt(query):

    templ_prompt = f"""

    This is an important query from the user
    {query}

    Help the user understand concepts
    You have access to all the past conversation with the user:
    {storeConvo[-1]}
    You dont have to repeat the past conversation to the user. Be concise
    Use the past conversations to help the user if needed
    NOTE: Never create any questions. always try and understand what the user wants instead

    """

    return templ_prompt


In [None]:
def general_llm(prompt):
  prompt = prompt
  # Define a prompt
  # prompt = "what is ur purpose"
  # generate(formatted_prompt(prompt))

  messages = [{"role":"user","content":"hello"},
                {"role":"assistant","content":"I am an assistant"},
                {"role":"user","content":prompt}
              ]
    # tell the model to generate
  input_ids = tokenizer.apply_chat_template(
    messages,
    return_tensors="pt"
  ).to(model.device)
  outputs = model.generate(
      input_ids,
      max_new_tokens=1024,
      eos_token_id=tokenizer.eos_token_id,
      pad_token_id=tokenizer.eos_token_id,
      do_sample=True,
      temperature=0.6,
      top_p=0.9,
  )

  response = outputs[0][input_ids.shape[-1]:]
  tokenizer.decode(response, skip_special_tokens=True)

  return(tokenizer.decode(response, skip_special_tokens=True))

In [None]:
import json

In [None]:
def verify_Q(prompt):


  #checks to see if the user wants to see an NSMQ question
  if prompt.startswith("&"):
    slash_prompt= slash(prompt)
    # The output of general_llm should be a JSON string
    slash_response = general_llm(slash_prompt)
    # Attempt to decode the JSON response from general_llm
    slash_ans= json.loads(slash_response)
    queryy= slash_ans["keywords"]

    scores, retrieved_examples = search_with_criterion(queryy, k=5, yr_rank_value= f"{year_value} NSMQ {rank_value}")

    # return scores, retrieved_examples,general_llm(prompt)
    # print(scores)
    # print(retrieved_examples)
    # print(gen_llm)

    highest_value = np.argmax(scores)

    store= f"{retrieved_examples['Question'][highest_value]} + {retrieved_examples['Preamble Text'][highest_value]}"

    storeConvo.append({
        "user_query": prompt,
        "assistant_response": store
      })
    if len(storeConvo) > 2:
       storeConvo.pop(0)

    return str(store)

  # continue the normal conversation if not
  else:
    prompttt= General_prompt(prompt)
    store= general_llm(prompttt)
    storeConvo.append({
         "user_query": prompt,
         "assistant_response": store
        })
    if len(storeConvo) > 2:
       storeConvo.pop(0)
    return store



In [None]:
def access_Q(prompt):
  response= verify_Q(prompt)

  return response

In [None]:
access_Q("/hey can u ask me a math question")

In [None]:
access_Q("can u explain the question further")

In [None]:
print(storeConvo)

In [None]:
storeConvo.pop(0)

{'user_query': '/hey can u ask me a math question',
 'assistant_response': 'Find the rate at which the surface area is increasing when the radius $r=100 \\mathrm{~cm} /s$ + Air is being pumped into a spherical balloon at a constant rate of $1000 \\mathrm{~cm}^{3} / \\mathrm{s}$'}

# API access point


In [None]:
# Install the required packages
!pip -q install fastapi uvicorn pyngrok

In [None]:
!ngrok authtoken ACCESS_TOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:

from fastapi import FastAPI, HTTPException
from pyngrok import ngrok
import uvicorn
from threading import Thread

# Define the FastAPI app
app = FastAPI()

@app.get("/gen_llm/{prompt}")
def read_root(prompt):
    result = access_Q(prompt)
    return result

# Function to run the FastAPI app with Uvicorn
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Start the FastAPI app in a new thread
server_thread = Thread(target=run_app)
server_thread.start()

!killall ngrok
# Expose the FastAPI app with ngrok
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

INFO:     Started server process [471]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


ngrok: no process found
Public URL: NgrokTunnel: "https://f92b-34-125-236-111.ngrok-free.app" -> "http://localhost:8000"


In [None]:
import requests
# Making a request to test the setup
base_url = 'https://3aac-34-125-236-111.ngrok-free.app'
prompt = "ask me a physics question"
response = requests.get(f"{base_url}/gen_llm/{prompt}")
print(response)
print(response.content)

INFO:     34.125.236.111:0 - "GET /gen_llm/ask%20me%20a%20physics%20question HTTP/1.1" 200 OK
<Response [200]>
b'"Based on our previous conversations, I can see that you have shown interest in both physics and math. Here\'s a question that combines both subjects:\\n\\nConsider a simple pendulum with a length L and a bob of mass m. The pendulum is released from an angle \xce\xb8\\\\_0. Find the angular velocity (d\xce\xb8/dt) of the pendulum when it reaches its maximum height.\\n\\nTo solve this problem, we need to consider the conservation of energy and the equations of motion for a simple pendulum. Here\'s a brief overview of the problem:\\n\\n1. We have a simple pendulum with a length L and a bob of mass m.\\n2. The pendulum is released from an angle \xce\xb8\\\\_0.\\n3. We want to find the angular velocity (d\xce\xb8/dt) of the pendulum when it reaches its maximum height.\\n\\nThe angular velocity is a measure of how fast the pendulum is rotating about its axis. To find the angular 