In [1]:
import os
import streamlit as st
import pickle
import time
import requests
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings  # Substitute with alternative embeddings
from langchain.vectorstores import FAISS

In [9]:
pip install -U langchain-huggingface

Note: you may need to restart the kernel to use updated packages.


In [26]:
pip install google-generativeai

Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install -U tqdm




ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandas-profiling 3.2.0 requires visions[type_image_path]==0.7.4, but you have visions 0.7.5 which is incompatible.
ydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.26.4 which is incompatible.



Collecting tqdm
  Downloading tqdm-4.67.0-py3-none-any.whl (78 kB)
                                              0.0/78.6 kB ? eta -:--:--
     ---------------                          30.7/78.6 kB ? eta -:--:--
     ---------------                          30.7/78.6 kB ? eta -:--:--
     ---------------                          30.7/78.6 kB ? eta -:--:--
     ---------------                          30.7/78.6 kB ? eta -:--:--
     -------------------------------------- 78.6/78.6 kB 312.1 kB/s eta 0:00:00
Installing collected packages: tqdm
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.65.0
    Uninstalling tqdm-4.65.0:
      Successfully uninstalled tqdm-4.65.0
Successfully installed tqdm-4.67.0


In [32]:
pip install Runnable


Collecting Runnable
  Downloading runnable-0.13.0-py3-none-any.whl (122 kB)
                                              0.0/122.7 kB ? eta -:--:--
     ----------                               30.7/122.7 kB ? eta -:--:--
     ----------                               30.7/122.7 kB ? eta -:--:--
     ----------                               30.7/122.7 kB ? eta -:--:--
     ----------                               30.7/122.7 kB ? eta -:--:--
     ------------                          41.0/122.7 kB 164.3 kB/s eta 0:00:01
     ------------                          41.0/122.7 kB 164.3 kB/s eta 0:00:01
     ------------                          41.0/122.7 kB 164.3 kB/s eta 0:00:01
     ---------------------                 71.7/122.7 kB 218.6 kB/s eta 0:00:01
     ---------------------                 71.7/122.7 kB 218.6 kB/s eta 0:00:01
     ---------------------------------    112.6/122.7 kB 262.6 kB/s eta 0:00:01
     ------------------------------------ 122.7/122.7 kB 277.0 kB/s eta 0:0

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pylint 2.16.2 requires colorama>=0.4.5; sys_platform == "win32", but you have colorama 0.4.4 which is incompatible.
spyder 5.4.3 requires keyring>=17.0.0, but you have keyring 8.7 which is incompatible.


In [2]:
# Set up the API key for Vertex AI
api_key = "api-key"
location = "us-central1"  # or the region where your Vertex AI is deployed
model_id = "gemini"  # Use the model ID associated with Gemini on Vertex AI

In [3]:
# Vertex AI endpoint for predictions
endpoint = f"https://{location}-aiplatform.googleapis.com/v1/projects/YOUR_PROJECT_ID/locations/{location}/publishers/google/models/{model_id}:predict"

In [4]:
# Load and process documents
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load()

In [5]:
len(data)

2

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs = text_splitter.split_documents(data)

In [11]:
from tqdm import tqdm, trange

In [13]:
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [14]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorindex_vertex = FAISS.from_documents(docs, embeddings)

In [15]:
file_path = "vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_vertex, f)

In [16]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

In [17]:
# Function to query Gemini model via Vertex AI using API key
def query_gemini_model(question):
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    payload = {
        "instances": [{"question": question}],
        "parameters": {"temperature": 0.9, "maxOutputTokens": 500}
    }

    response = requests.post(endpoint, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None


In [20]:
from langchain.llms import BaseLLM
import requests

In [38]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.llms import BaseLLM

# Custom LLM wrapper for Gemini API using BaseLLM
class GeminiLLM(BaseLLM):
    def _generate(self, prompt: str, stop: list = None) -> str:
        # Call the Gemini API and return the generated answer
        return query_gemini_model(prompt)

    def _llm_type(self) -> str:
        return "gemini"

# Instantiate the custom LLM
gemini_llm = GeminiLLM()

In [39]:
# Assuming vectorIndex is already defined (from previous steps)
# Use the custom LLM in the LLMChain
chain = RetrievalQAWithSourcesChain.from_llm(llm=gemini_llm, retriever=vectorIndex.as_retriever())


In [41]:
# Ask a question
query = "what is the price of Tiago iCNG?"
langchain.debug = True
answer = chain({"question": query}, return_only_outputs=True)

# Debugging the answer
if answer:
    print("Answer:", answer)
else:
    print("Error: No valid answer received.")

NameError: name 'langchain' is not defined

In [42]:
# Ask a question
query = "what is the price of Tiago iCNG?"
answer = chain({"question": query}, return_only_outputs=True)

# Debugging the answer
if answer:
    print("Answer:", answer)
else:
    print("Error: No valid answer received.")

Error: 401, {
  "error": {
    "code": 401,
    "message": "Request had invalid authentication credentials. Expected OAuth 2 access token, login cookie or other valid authentication credential. See https://developers.google.com/identity/sign-in/web/devconsole-project.",
    "status": "UNAUTHENTICATED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "ACCESS_TOKEN_TYPE_UNSUPPORTED",
        "metadata": {
          "service": "aiplatform.googleapis.com",
          "method": "google.cloud.aiplatform.v1.PredictionService.Predict"
        }
      }
    ]
  }
}



AttributeError: 'NoneType' object has no attribute 'flatten'