In [None]:
import os
os.environ["GOOGLE_API_KEY"] = ""


In [None]:
# Install necessary libraries
!pip install faiss-cpu
!pip install langchain langchain-community huggingface_hub
!pip uninstall sentence-transformers
!pip install sentence-transformers
!pip uninstall InstructorEmbedding -y # Uninstall InstructorEmbedding to ensure a clean install
!pip uninstall InstructorEmbedding -y # Uninstall InstructorEmbedding to ensure a clean install
!pip install InstructorEmbedding
!pip install torch==2.0.1

# Import necessary libraries
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms.base import LLM
import os
import requests
from typing import Optional, List

# Set Google API key in the environment
os.environ["GOOGLE_API_KEY"] = "AIzaSyAtnQmdERbgztEQi2E0v0-bAisaaK1EVmA"

# Custom LLM class to integrate with LangChain and use Google Generative AI
class GoogleGenerativeAI(LLM):
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        api_key = os.getenv("GOOGLE_API_KEY")  # Fetch the API key from environment variables
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        json_data = {
            "prompt": prompt,
            "temperature": 0.5,
            "max_output_tokens": 1024
        }
        response = requests.post(
            "https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText",
            headers=headers,
            json=json_data
        )
        return response.json().get("candidates", [{}])[0].get("output", "No response available")

    @property
    def _identifying_params(self) -> dict:
        return {"model_name": "google-generative-ai"}

    @property
    def _llm_type(self) -> str:
        return "google-generative-ai"  # Custom type name

# Initialize instructor embeddings using the Hugging Face model
from langchain_community.embeddings import HuggingFaceEmbeddings

# Initialize embeddings using a stable Hugging Face model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


vectordb_file_path = "faiss_index"

# Function to create FAISS vector DB
def create_vector_db():
    loader = CSVLoader(file_path='QandA.csv', source_column="Question",encoding='latin-1')  # Adjust column name as needed
    data = loader.load()

    vectordb = FAISS.from_documents(documents=data, embedding=embeddings)
    vectordb.save_local(vectordb_file_path)

# Function to initialize the QA chain
def get_qa_chain():
    vectordb = FAISS.load_local(vectordb_file_path, embeddings, allow_dangerous_deserialization=True)
    retriever = vectordb.as_retriever(score_threshold=0.7)

    prompt_template = """Given the following context and a question, generate an answer based on this context only.
    In the answer, try to provide as much text as possible from the "response" section in the source document context without making many changes.
    If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

    CONTEXT: {context}

    QUESTION: {question}"""

    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

    chain = RetrievalQA.from_chain_type(
        llm=GoogleGenerativeAI(),  # Use the custom LLM with Google Generative AI
        chain_type="stuff",
        retriever=retriever,
        input_key="query",
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
    )

    return chain

# Create the vector database and initialize the QA chain
create_vector_db()
chain = get_qa_chain()


Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1
Collecting langchain
  Downloading langchain-0.2.16-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.17-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-core<0.3.0,>=0.2.38 (from langchain)
  Downloading langchain_core-0.2.40-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.4-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.1

  embeddings = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
! pip install streamlit -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m49.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.9/82.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!wget -q -O - ipv4.icanhazip.com

35.225.167.110


In [None]:
!npm install -g localtunnel@2.0.2

[K[?25h
added 22 packages, and audited 23 packages in 2s

3 packages are looking for funding
  run `npm fund` for details

1 [33m[1mmoderate[22m[39m severity vulnerability

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [1]:
! streamlit run main.py & npx localtunnel --port 8501

/bin/bash: line 1: streamlit: command not found
[1G[0JNeed to install the following packages:
  localtunnel@2.0.2
Ok to proceed? (y) [20G^C
