In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install streamlit
!pip install pypdf
!pip install pydantic==1.10.9
!pip install langchain
!pip install accelerate
!pip install bitsandbytes
!pip install transformers
!pip install sentence_transformers
!pip install faiss_gpu
!pip install ctransformers
!pip install streamlit_chat
!pip install pyngrok

Collecting streamlit
  Downloading streamlit-1.25.0-py2.py3-none-any.whl (8.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
Collecting pympler<2,>=0.9 (from streamlit)
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
Collecting tzlocal<5,>=1.1 (from streamlit)
  Downloading tzlocal-4.3.1-py3-none-any.whl (20 kB)
Collecting validators<1,>=0.2 (from streamlit)
  Downloading validators-0.20.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.32-py3-none-any.whl (188 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8 (from streamlit)
  Downloading pydeck-0.8.0-py2.py3-none-any.whl (4.7 

In [None]:
%%writefile utils.py

from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
FAISS_PATH = 'vector/similarity_db'
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cuda'})
def store_vector(file_path):
    '''
    function will first load the CSV and converts into the embeddings
    by hugging face. FAISS is use to calculate the similarity of embeddings
    which will be store in a vector file
    '''
    loader = CSVLoader(file_path=file_path,
                      encoding="utf-8", csv_args={
                      'delimiter': '\t'})
    data = loader.load()
    db = FAISS.from_documents(data, embeddings)
    db.save_local(FAISS_PATH)

def load_vector():
    db = FAISS.load_local(FAISS_PATH, embeddings)
    return db

def load_llm():
    # Load the locally downloaded model here
    llm = CTransformers(
        model = "drive/MyDrive/llama-2-7b-chat.ggmlv3.q4_0.bin",
        model_type="llama",
        max_new_tokens = 512,
        temperature = 0
    )
    return llm

Writing utils.py


In [None]:
# store_vector("emp.csv")

In [None]:
%%writefile app.py


import streamlit as st
from utils import load_llm, load_vector
from langchain.chains import ConversationalRetrievalChain
from streamlit_chat import message

st.title("Employee Book Bot based on CSV")
st.markdown("<h3 style='text-align: center; color: black;'>Built by <a href='https://github.com/hassanrehman11'>Hassan Rehman </a></h3>", unsafe_allow_html=True)

db = load_vector()
llm = load_llm()
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())

def conversational_chat(query):
    result = chain({"question": query, "chat_history": st.session_state['history']})
    st.session_state['history'].append((query, result["answer"]))
    return result["answer"]

if 'history' not in st.session_state:
    st.session_state['history'] = []

if 'generated' not in st.session_state:
    st.session_state['generated'] = ["Hello I am an Employee Bot. You can ask anything related to resources!"]

if 'past' not in st.session_state:
    st.session_state['past'] = ["Hello!!!"]

#container for the chat history
response_container = st.container()
#container for the user's text input
container = st.container()

with container:
    with st.form(key='my_form', clear_on_submit=True):

        user_input = st.text_input("Query:", placeholder="Ask any thing about Pokemon", key='input')
        submit_button = st.form_submit_button(label='Send')

    if submit_button and user_input:
        output = conversational_chat(user_input)

        st.session_state['past'].append(user_input)
        st.session_state['generated'].append(output)

if st.session_state['generated']:
    with response_container:
        for i in range(len(st.session_state['generated'])):
            message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
            message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")


Writing app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501

[..................] | fetchMetadata: sill resolveWithNewModule localtunnel@2.0[0m[K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://130.211.197.243:8501[0m
[0m
[K[?25hnpx: installed 22 in 4.731s
your url is: https://wise-lions-think.loca.lt
