In [1]:
!pip install streamlit pyngrok faiss-cpu gpt4all groq huggingface-hub langchain langchain-community langchain-core langchain-groq langchain-openai langchain-text-splitters langserve langsmith sentence-transformers tokenizers transformers uvicorn

Collecting streamlit
  Downloading streamlit-1.36.0-py2.py3-none-any.whl (8.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gpt4all
  Downloading gpt4all-2.7.0-py3-none-manylinux1_x86_64.whl (29.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.8/29.8 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting groq
  Downloading groq-0.9.0-py3-none-any.whl (103 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting langchain
  Downloading langchain-0.2.6-py3-none-any.

In [2]:
%%writefile app.py
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.document_loaders.csv_loader import UnstructuredCSVLoader
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
import numpy as np

# Load your documents
loader = UnstructuredCSVLoader(
    file_path="/content/sample_data/final_library_books.csv", mode="elements"
)
docs = loader.load()
text_documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
documents = text_splitter.split_documents(text_documents)

huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'True'}
embeddings = GPT4AllEmbeddings(
    model_name=model_name,
    gpt4all_kwargs=gpt4all_kwargs
)

vectorstore = FAISS.from_documents(documents, huggingface_embeddings)
db = FAISS.from_documents(documents, GPT4AllEmbeddings(model_name='all-MiniLM-L6-v2.gguf2.f16.gguf'))

prompt_template = """
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

groq_api_key = "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"
llm = ChatGroq(groq_api_key=groq_api_key, model_name="gemma-7b-it")

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":16})

retrievalQA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

# Streamlit app
st.title("Your Own AI based Librarian!!")

query = st.text_input("Enter your query:")
if query:
    result = retrievalQA.invoke({"query":query})
    st.write(result['result'])



Writing app.py


In [3]:
! pip install streamlit -q

In [4]:
!pip install unstructured

Collecting unstructured
  Downloading unstructured-0.14.9-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
Collecting filetype (from unstructured)
  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)
Collecting emoji (from unstructured)
  Downloading emoji-2.12.1-py3-none-any.whl (431 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m431.4/431.4 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
Collecting python-iso639 (from unstructured)
  Downloading python_iso639-2024.4.27-py3-none-any.whl (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.7/274.7 kB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langdetect (from unstructured)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [7]:
!wget -q -O - ipv4.icanhazip.com

35.221.181.124


In [None]:
! streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.221.181.124:8501[0m
[0m
[K[?25hnpx: installed 22 in 3.186s
your url is: https://common-kiwis-chew.loca.lt
2024-06-29 23:04:57.719370: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-29 23:04:57.719442: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-29 23:04:57.722810: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register fact