<a href="https://colab.research.google.com/github/MeerBaloch7/chatbot/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install  wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=6b3aad6f6b9ba3cdf3ae75b9cec3b07645bf1d7c1eeca34ad719998f1b843f49
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


## Downloading the llama-2-7B chat model from huggingface

In [2]:
import wget
model_url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"
wget.download(model_url)

'llama-2-7b-chat.Q2_K.gguf'

### Streamlit for iteractive webapp design

In [None]:
!pip install streamlit
!pip install llama-index-legacy
!pip install llama-index-llms-llama-cpp
!pip install langchain

In [None]:
%%writefile app.py
import pandas as pd
import streamlit as st
from llama_index.core import (
  SimpleDirectoryReader,
  VectorStoreIndex,
  ServiceContext,
)
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.legacy.llms.llama_utils import messages_to_prompt, completion_to_prompt
from langchain.schema import(SystemMessage, HumanMessage, AIMessage)
#---------------------------#
# function
def init_page() -> None:
  st.set_page_config(
    page_title="Personal Chatbot"
  )
  st.header("Persoanl Chatbot")
  st.sidebar.title("Options")
  file=st.sidebar.file_uploader("Upload a file", type=["csv"],key="file_uploader")
  return file

def select_llm() -> LlamaCPP:
  return LlamaCPP(
    model_path="/content/llama-2-7b-chat.Q2_K.gguf",
    temperature=0.1,
    max_new_tokens=500,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers":1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
  )

def init_messages() -> None:
  clear_button = st.sidebar.button("Clear Conversation", key="clear")
  if clear_button or "messages" not in st.session_state:
    st.session_state.messages = [
      SystemMessage(
        content="you are a helpful AI assistant. Reply your answer in markdown format."
      )
    ]

def get_answer(llm, messages) -> str:
  response = llm.complete(messages)
  return response.text

def read_and_chunk_file(file):
    # Read the CSV file
    df = pd.read_csv(file)
    # Example: convert DataFrame to a list of text chunks
    chunks = df.astype(str).apply(lambda row: ' '.join(row), axis=1).tolist()
    return chunks

def store_chunks_in_vector_store(chunks):
    vector_store_index = VectorStoreIndex()
    for chunk in chunks:
        vector_store_index.add_document(chunk)
    return vector_store_index

def get_relevant_chunks(query, vector_store_index):
    # Query the vector store and get relevant chunks
    relevant_chunks = vector_store_index.query(query)
    return relevant_chunks


def main() -> None:
  file=init_page()
  llm = select_llm()
  init_messages()

  vector_store_index = None
  if file:
        chunks = read_and_chunk_file(file)
        vector_store_index = store_chunks_in_vector_store(chunks)


  if user_input := st.chat_input("Input your question!"):
    st.session_state.messages.append(HumanMessage(content=user_input))
    if vector_store_index:
            relevant_chunks = get_relevant_chunks(user_input, vector_store_index)
            with st.spinner("Bot is typing ..."):
             answer = get_answer(llm, user_input)
             print(answer)
    else:
       with st.spinner("Bot is typing ..."):
                answer = get_answer(llm, st.session_state.messages, [])
                print(answer)
    st.session_state.messages.append(AIMessage(content=answer))


  messages = st.session_state.get("messages", [])
  for message in messages:
    if isinstance(message, AIMessage):
      with st.chat_message("assistant"):
        st.markdown(message.content)
    elif isinstance(message, HumanMessage):
      with st.chat_message("user"):
        st.markdown(message.content)




if __name__ == "__main__":
  main()

Overwriting app.py


In [7]:
!npm install -g localtunnel

[K[?25h
added 22 packages, and audited 23 packages in 3s

3 packages are looking for funding
  run `npm fund` for details

1 [33m[1mmoderate[22m[39m severity vulnerability

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [None]:
!streamlit run app.py &>/content/logs.txt &
!npx localtunnel --port 8501

In [None]:
!streamlit run app.py & npx localtunnel --port 8501

In [None]:
!pip install streamlit-chat
!pip install -U langchain-community
!pip install sentence-transformers
!pip install faiss-gpu
!pip install ctransformers
!pip install huggingface_hub

In [4]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [5]:
%%writefile app2.py
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain

DB_FAISS_PATH = 'vectorstore/db_faiss'

#Loading the model
def load_llm():
    # Load the locally downloaded model here
    llm = CTransformers(
        model = "/content/llama-2-7b-chat.Q2_K.gguf",
        model_type="llama",
        max_new_tokens = 512,
        temperature = 0.5
    )
    return llm


st.title("Sales data analytics with Llama 2 🦙🦜")
uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")

if uploaded_file :
   #use tempfile because CSVLoader only accepts a file_path
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_file_path = tmp_file.name

    loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={
                'delimiter': ','})
    data = loader.load()
    #st.json(data)
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

    db = FAISS.from_documents(data, embeddings)
    db.save_local(DB_FAISS_PATH)
    llm = load_llm()
    chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever(search_kwargs={'k': 2}),return_source_documents=True)

    def conversational_chat(query):
        result = chain({"question": query, "chat_history": st.session_state['history']})
        st.session_state['history'].append((query, result["answer"]))
        return result["answer"]

    if 'history' not in st.session_state:
        st.session_state['history'] = []

    if 'generated' not in st.session_state:
        st.session_state['generated'] = ["Hello ! Ask me anything about " + uploaded_file.name + " 🤗"]

    if 'past' not in st.session_state:
        st.session_state['past'] = ["Hey ! 👋"]

    #container for the chat history
    response_container = st.container()
    #container for the user's text input
    container = st.container()

    with container:
        with st.form(key='my_form', clear_on_submit=True):

            user_input = st.text_input("Query:", placeholder="Talk to your csv data here (:", key='input')
            submit_button = st.form_submit_button(label='Send')

        if submit_button and user_input:
            output = conversational_chat(user_input)

            st.session_state['past'].append(user_input)
            st.session_state['generated'].append(output)

    if st.session_state['generated']:
        with response_container:
            for i in range(len(st.session_state['generated'])):
                message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
                message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")







Writing app2.py


In [8]:
!streamlit run app2.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.46.114.162:8501[0m
[0m
your url is: https://nice-toes-agree.loca.lt

>> from langchain.document_loaders import CSVLoader

with new imports of:

>> from langchain_community.document_loaders import CSVLoader
You can use the langchain cli to **automatically** upgrade many imports. Please see documentation here <https://python.langchain.com/v0.2/docs/versions/v0_2/>
  warn_deprecated(

>> from langchain.embeddings import HuggingFaceEmbeddings

with new imports of:

>> from langchain_community.embeddings import HuggingFaceEmbeddings
You can use the langchain cli to **automatically** upgrade many imports. Please see documentation here <https://python.langchain.com/v0.2/docs/version

In [None]:
!pip install  pinecone transformers

In [None]:
!pip install pinecone-client==5.0.1

In [None]:
%%writefile app3.py
import tempfile
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import Replicate
from langchain.text_splitter import CharacterTextSplitter
import streamlit as st
from streamlit_chat import message
import os
os.environ['REPLICATE_API_TOKEN'] = "r8_32L6TdIpCnQ1ItuDnZer752qu9eh4fB2T4R8z"
import pinecone
from langchain.vectorstores import Pinecone
pc = pinecone(api_key="692457d0-acfa-4c38-bce4-340c77d63fc4")
index = pc.Index("sales")

st.title("Sales data analytics with Llama 2 🦙🦜")
uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")
if uploaded_file :
   #use tempfile because CSVLoader only accepts a file_path
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_file_path = tmp_file.name

    loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={
                'delimiter': ','})
    data = loader.load()

    # Splitting the text into chunks
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(data)

    # Embeddings
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

    vectordb=Pinecone.from_documents(docs,embeddings,index_name="sales",index=index)

    # replicate initialization
    llm= Replicate(model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
                   input={"temperature": 0.75, "max_length": 3000})

    # QA chain
    qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    vectordb.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True)

    def conversational_chat(query):
        result = qa_chain({"question": query, "chat_history": st.session_state['history']})
        st.session_state['history'].append((query, result["answer"]))
        return result["answer"]

    if 'history' not in st.session_state:
        st.session_state['history'] = []

    if 'generated' not in st.session_state:
        st.session_state['generated'] = ["Hello ! Ask me anything about " + uploaded_file.name + " 🤗"]

    if 'past' not in st.session_state:
        st.session_state['past'] = ["Hey ! 👋"]

    #container for the chat history
    response_container = st.container()
    #container for the user's text input
    container = st.container()

    with container:
        with st.form(key='my_form', clear_on_submit=True):
            user_input = st.text_input("Query:", placeholder="Talk to your csv data here (:", key='input')
            submit_button = st.form_submit_button(label='Send')

            if submit_button and user_input:
                output = conversational_chat(user_input)

                st.session_state['past'].append(user_input)
                st.session_state['generated'].append(output)

    if st.session_state['generated']:
          with response_container:
            for i in range(len(st.session_state['generated'])):
              message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
              message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")






Overwriting app3.py


In [None]:
!streamlit run app3.py & npx localtunnel --port 8501