<a href="https://colab.research.google.com/github/KaziShadmanSakib/EducationalChatbot/blob/main/EducationalChatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Installation and Imports**

In [None]:
!pip install pypdf



In [None]:
!pip install -q transformers einops accelerate langchain bitsandbytes

In [None]:
# embeddings
!pip install sentence_transformers



In [None]:
!pip install llama-index



In [None]:
%pip install llama-index-llms-huggingface



In [None]:
!pip install langchain-community # install the correct package 'langchain-community'



In [None]:
%pip install llama-index-embeddings-langchain



# **Huggingface Login**

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: read).

# **Data Load**

In [None]:
from google.colab import drive
from llama_index.core import SimpleDirectoryReader

# Loading the Knowledge Base
# Mount Google Drive
drive.mount('/content/drive')

# Reads documents and converts them into a format suitable for the Llama Index
documents = SimpleDirectoryReader("/content/drive/My Drive/Llama_2_Data/").load_data()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
documents

[Document(id_='2b131607-33c2-4ef5-8909-4dd363871a50', embedding=None, metadata={'page_label': 'a1', 'file_name': 'Social_Science_Student.pdf', 'file_path': '/content/drive/My Drive/Llama_2_Data/Social_Science_Student.pdf', 'file_type': 'application/pdf', 'file_size': 19715726, 'creation_date': '2025-01-06', 'last_modified_date': '2025-01-03'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='History \nand \nSocial Science\nActivity Book\nClass Six\nNational Curriculum and Textbook Board, Bangladesh', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}

# **System Prompt**

In [None]:
from llama_index.core.prompts.prompts import SimpleInputPrompt

# Guides the language model to generate answers with specific rules and behavior
system_prompt ="""
You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output or answer, avoid creating output or answer with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Generate output or answer in the same language at which the question is asked.
- Never generate offensive or foul language.

"""
# Formats user queries for the Llama2
# default format supportable by Llama2
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

# **Llama2 7b LLM**

In [None]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.llms.huggingface import HuggingFaceLLM

# Optimizes memory usage and computation by using an 8-bit model
quantization_config = BitsAndBytesConfig(
   load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True
)

# Loads a fine-tuned version of the Llama2
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16, "quantization_config": quantization_config} # Use quantization_config for 8bit and offloading
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
!pip install --upgrade transformers huggingface_hub



In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding


# Used for creating vector representations of the documents for semantic similarity and retrieval.
embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)

  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# Knowledge Base
import torch
from llama_index.core import VectorStoreIndex


torch.cuda.empty_cache()

# Builds an index of the documents using the embeddings
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model) # Pass the embed_model to the from_documents() method so it uses the correct model.

In [None]:
# Combines the Llama2 and the vector store index for query answering
query_engine = index.as_query_engine(llm=llm)

In [None]:
# Query is processed using embedded vector space with relevant document and answere is generated by llama2
response= query_engine.query("Write a summary about Sheikh Mujib.")

In [None]:
print(response)

Sheikh Mujibur Rahman, also known as Bangabandhu, was a courageous and empathetic leader who was born in Tungipara, Bangladesh in 1920. He was the first Bengali Muslim politician to conquer the minds of the general mass, and he was named Sher-e-Bangla (Lion of Bengal). Mujib was a teenager when he formed a children's team and was the leader of the volunteers who organized a conference with eminent political leaders. He was actively engaged in social works and sports and passed his matriculation exam in 1941. Mujib's leadership qualities, empathy for the poor, and dedication to his duties were evident even at a young age. He was a future leader who was recognized by his teachers, and his qualities were overwhelming but also petrifying.


In [None]:
while True:
  print("User↓ ")
  query=input()
  if query == "N":
    break
  print("AI↓ ")
  response = query_engine.query(query)
  print(response)

User↓ 
Hello!
AI↓ 
Hello! I'm Khushi Apa, your History and Social Science teacher. It's great to see you in Class Six today! I hope you're excited to start this new academic year with your new classmates. Let's get started with our first lesson, shall we? 😊
User↓ 
Khushi Apa, I had some queries about History and Social Science.
AI↓ 
Of course, Khushi Apa! I'm here to help you with your queries. What would you like to know? Please provide me with more details or context about the topic you are interested in, and I will do my best to assist you.
User↓ 
Who was Sir Isaac Newton? Write a summary about Sir Isaac Newton.
AI↓ 
Sir Isaac Newton (1642-1727) was a British mathematician, physicist, and astronomer who is widely recognized as one of the most influential scientists in history. He made groundbreaking contributions to the fields of mathematics, optics, and physics, and his work laid the foundation for classical mechanics. Newton was born in Woolsthorpe, Lincolnshire, England, and he s

# **Save LLM**

In [None]:
from transformers import AutoTokenizer

# Save the tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer.save_pretrained("/content/drive/My Drive/Llama_2_Model/")

# Save the model
llm.save_pretrained("/content/drive/My Drive/Llama_2_Model/")

# Save the index to disk
index.save_to_disk("/content/drive/My Drive/Llama_2_Model/vector_index.json")

embed_model.save("/content/drive/My Drive/Llama_2_Model/")


!zip -r llama2_qna_system.zip /content/drive/My Drive/Llama_2_Model/

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# **Save LLM Locally**

In [None]:
import torch
import locale
locale.getpreferredencoding = lambda: "UTF-8"

# prepare a doc and comment about how to connect to llama2 using today's demo
# how do we connect and manupulate llama2
# educational chatbot for grade 6 students to teach math
# check research paper of llama for concerns in chat version

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import HuggingFaceLLM


from llama_index.prompts.prompts import SimpleInputPrompt


system_prompt = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Never generate offensive or foul language.
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)


service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

# documents = SimpleDirectoryReader("/data/").load_data()

# index = VectorStoreIndex.from_documents(documents, service_context=service_context)

#query_engine = index.as_query_engine()
#response = query_engine.query("What is the birth date of Mujib?")

#print(response)



# Specify the directory where your model is saved
persist_dir = './model'

# Create a storage context from the directory
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)

# Load the index from the storage context
index = load_index_from_storage(storage_context = storage_context, service_context = service_context)

# Now you can use the index just like an in-memory model
query_engine = index.as_query_engine()
response = query_engine.query("What is the birth date of Mujib?")

# Print the results
print(response)