In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Sources:
- https://medium.com/@murtuza753/using-llama-2-0-faiss-and-langchain-for-question-answering-on-your-own-data-682241488476
- https://betterprogramming.pub/build-a-chatbot-on-your-csv-data-with-langchain-and-openai-ed121f85f0cd
- https://www.kaggle.com/datasets/ananthu017/squad-csv-format

#**Streamlining Setup**: Essential Python Packages Installed with a Single Click!

In [6]:
!pip install -q gradio
!pip install -qU transformers accelerate einops langchain xformers bitsandbytes faiss-gpu sentence_transformers

#**Power-Packed Python Imports**: Building Blocks for Advanced NLP Applications!

In [7]:
import torch
from torch import cuda, bfloat16
import transformers
from transformers import StoppingCriteria, StoppingCriteriaList
from langchain.llms import HuggingFacePipeline
import pandas as pd
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
import gradio

#**Efficient Model Loading and Configuration**: Loading, Configuring, and Preparing a Pre-trained Language Model with Optimization

In [8]:
# Define the model ID for a pre-trained language model
model_id = 'meta-llama/Llama-2-13b-chat-hf'

# Determine the device (GPU if available, else CPU)
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# Configure quantization settings for loading the model with less GPU memory usage
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# Initialize HuggingFace authentication token
hf_auth = 'Create_your_own_llama2_API_token_on_huggingface'

# Load the configuration for the pre-trained model
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

# Load the model for causal language modeling
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# Set the model in evaluation mode for inference
model.eval()

# Print device information where the model is loaded
print(f"Model loaded on {device}")

Downloading (…)lve/main/config.json:   0%|          | 0.00/587 [00:00<?, ?B/s]



Downloading (…)fetensors.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0


#**Tokenization and Stop List Preparation**: Generating Tokens and IDs for Special Text Sequences

In [9]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

stop_token_ids



Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [10]:
# define custom stopping criteria object

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

# **Text Generation Configuration**: Setting Up Text Generation with Transformers Pipeline


In [11]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [12]:
llm = HuggingFacePipeline(pipeline=generate_text)

# **Loading Q&A Data from CSV**: Importing Q&A Pairs from a CSV File

In [13]:
# Load your CSV file containing Q&A pairs
csv_file_path = "/content/drive/MyDrive/LLM_Data/Corona_dataset.csv"  # Replace with the actual path to your CSV file
#csv_file_path = "/content/drive/MyDrive/LLM_Data/SQuAD_csv.csv"  # Replace with the actual path to your CSV file

qa_data = pd.read_csv(csv_file_path)

qa_data.head(5)

Unnamed: 0,source,url,question,answer,wrong_answer
0,ABC Australia,https://www.abc.net.au/triplej/programs/hack/c...,What are the symptoms?,"Symptoms include fever, coughing, sore throat,...","As cases of coronavirus surge, health authorit..."
1,ABC Australia,https://www.abc.net.au/triplej/programs/hack/c...,When should I get tested?,Your doctor will tell you if you need to get t...,If you have been in contact with a person with...
2,ABC Australia,https://www.abc.net.au/triplej/programs/hack/c...,What's the difference between physical distanc...,"As cases of coronavirus surge, health authorit...","Remember, the vast majority of Australians are..."
3,ABC Australia,https://www.abc.net.au/triplej/programs/hack/c...,How do I practice physical distancing?,If you have been in contact with a person with...,Other countries have closed schools and univer...
4,ABC Australia,https://www.abc.net.au/triplej/programs/hack/c...,What's closed?,Physical distancing is the reason the Federal ...,"Symptoms include fever, coughing, sore throat,..."


In [14]:
#qa_data = qa_data.loc[:, ['question','text']]
qa_data = qa_data.loc[:, ['question','answer']]
print(qa_data.shape)
qa_data.head(5)

(481, 2)


Unnamed: 0,question,answer
0,What are the symptoms?,"Symptoms include fever, coughing, sore throat,..."
1,When should I get tested?,Your doctor will tell you if you need to get t...
2,What's the difference between physical distanc...,"As cases of coronavirus surge, health authorit..."
3,How do I practice physical distancing?,If you have been in contact with a person with...
4,What's closed?,Physical distancing is the reason the Federal ...


In [15]:
qa_data = qa_data[:20000]
print(qa_data.shape)
qa_data.head(5)

(481, 2)


Unnamed: 0,question,answer
0,What are the symptoms?,"Symptoms include fever, coughing, sore throat,..."
1,When should I get tested?,Your doctor will tell you if you need to get t...
2,What's the difference between physical distanc...,"As cases of coronavirus surge, health authorit..."
3,How do I practice physical distancing?,If you have been in contact with a person with...
4,What's closed?,Physical distancing is the reason the Federal ...


# **Data Loading and Vectorization**: Loading Data from CSV and Creating Vector Store

In [16]:
loader = CSVLoader(file_path=csv_file_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()

# Initialize embeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# Initialize the vector store
vectorstore = FAISS.from_documents(data, embeddings)

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [17]:
# Initialize the chat-based retrieval chain
chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

chat_history = []

query = "How do I practice physical distancing?	"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 Stay home as much as possible, avoid crowded places and gatherings, and maintain a distance of at least 1 meter from others whenever possible. If you must go out, limit your group size to no more than 2 people, and avoid sharing personal items like towels, utensils, or drinking glasses. Wear a mask when you're in public, and wash your hands frequently with soap and water.


In [18]:
def chatbot_interface(query):
    chat_history = []
    result = chain({"question": query, "chat_history": chat_history})
    return result['answer']

In [None]:
interface = gradio.Interface(
    fn=chatbot_interface,
    inputs=gradio.inputs.Textbox(),
    outputs="text",
    layout="vertical",
    title="Chatbot for Physical Distancing"
)
interface.launch(share=True)

  inputs=gradio.inputs.Textbox(),
  inputs=gradio.inputs.Textbox(),
  inputs=gradio.inputs.Textbox(),
  interface = gradio.Interface(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://37609dcda83d0bc215.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
