In [64]:
%%writefile requirements.txt
llama-index
google-generativeai
llama-index-llms-gemini
pypdf
python-dotenv
IPython
llama-index-embeddings-gemini
streamlit

-e .

Overwriting requirements.txt


In [None]:
!pip install -r requirements.txt

In [67]:
import google.generativeai as genai

from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from llama_index.llms.gemini import Gemini
from llama_index.core import ServiceContext
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.embeddings.gemini import GeminiEmbedding
from IPython.display import Markdown, display
import os
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings

In [None]:
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

for models in genai.list_models():
  if "generateContent" in models.supported_generation_methods:
    print(models.name)

#Use generateContent for text to text and image to text

In [None]:
for models in genai.list_models():
  print(models)

### **END-TO-END**

In [81]:
%%writefile exception.py
import sys


class customexception(Exception):

    def __init__(self,error_message,error_details:sys):
        self.error_message=error_message
        _,_,exc_tb=error_details.exc_info()
        print(exc_tb)

        self.lineno=exc_tb.tb_lineno
        self.file_name=exc_tb.tb_frame.f_code.co_filename

    def __str__(self):
        return "Error occured in python script name [{0}] line number [{1}] error message [{2}]".format(
        self.file_name, self.lineno, str(self.error_message))


if __name__=="__main__":
    try:
        a=1/0

    except Exception as e:
        #print(e)
        raise customexception(e,sys)

Overwriting exception.py


In [82]:
%%writefile logger.py
import logging
import os
from datetime import datetime

LOG_FILE=f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"

log_path=os.path.join(os.getcwd(),"logs")

os.makedirs(log_path,exist_ok=True)

LOG_FILEPATH=os.path.join(log_path,LOG_FILE)


logging.basicConfig(level=logging.INFO,
                    filename=LOG_FILEPATH,
                    format="[%(asctime)s] %(lineno)d %(name)s - %(levelname)s - %(message)s"

)

Overwriting logger.py


In [83]:
%%writefile setup.py
from setuptools import find_packages, setup

setup(
    name = 'QApplication',
    version= '0.0.1',
    author= 'sunny savita',
    author_email= 'sunny.savita@gmail.com',
    packages= find_packages(),
    install_requires = []

)

Overwriting setup.py


In [84]:
%%writefile template.py
import os
from pathlib import Path

list_of_files=[
    "QAWithPDF/__init__.py",
    "QAWithPDF/data_ingestion.py",
    "QAWithPDF/embedding.py",
    "QAWithPDF/model_api.py",
    "Experiments/experiment.ipynb",
    "StreamlitApp.py",
    "logger.py",
    "exception.py",
    "setup.py"
        ]


for filepath in list_of_files:
   filepath = Path(filepath)
   filedir, filename = os.path.split(filepath)

   if filedir !="":
      os.makedirs(filedir, exist_ok=True)

   if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
      with open(filepath, 'w') as f:
         pass

Writing template.py


In [85]:
%%writefile data_ingestion.py
from llama_index.core import SimpleDirectoryReader
import sys
from exception import customexception
from logger import logging

def load_data(data):
    """
    Load PDF documents from a specified directory.

    Parameters:
    - data (str): The path to the directory containing PDF files.

    Returns:
    - A list of loaded PDF documents. The specific type of documents may vary.
    """
    try:
        logging.info("data loading started...")
        loader = SimpleDirectoryReader("Data")
        documents=loader.load_data()
        logging.info("data loading completed...")
        return documents
    except Exception as e:
        logging.info("exception in loading data...")
        raise customexception(e,sys)


Overwriting data_ingestion.py


In [113]:
%%writefile embedding.py
from llama_index.core import VectorStoreIndex
from llama_index.core import ServiceContext
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.embeddings.gemini import GeminiEmbedding

from llama_index.core.node_parser import SentenceSplitter
from data_ingestion import load_data
from model_api import load_model

import sys
from exception import customexception
from logger import logging

def download_gemini_embedding(model,document):
    """
    Downloads and initializes a Gemini Embedding model for vector embeddings.

    Returns:
    - VectorStoreIndex: An index of vector embeddings for efficient similarity queries.
    """
    try:
        logging.info("")
        gemini_embed_model = GeminiEmbedding(model_name="models/embedding-001")
        node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20)
        transformations = [node_parser]

        logging.info("")
        index = VectorStoreIndex.from_documents(documents=document, embed_model=gemini_embed_model, transformations=transformations)

        index.storage_context.persist()

        logging.info("")
        query_engine = index.as_query_engine(llm=model)
        return query_engine
    except Exception as e:
        raise customexception(e,sys)

Overwriting embedding.py


In [114]:
%%writefile model_api.py

import os

import sys

from llama_index.llms.gemini import Gemini
from IPython.display import Markdown, display
import google.generativeai as genai
from exception import customexception
from logger import logging


GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")

genai.configure(api_key=GOOGLE_API_KEY)

def load_model():

    """
    Loads a Gemini-Pro model for natural language processing.

    Returns:
    - Gemini: An instance of the Gemini class initialized with the 'gemini-pro' model.
    """
    try:
        model=Gemini(models='gemini-pro',api_key=GOOGLE_API_KEY)
        return model
    except Exception as e:
        raise customexception(e,sys)

Overwriting model_api.py


In [115]:
%%writefile app.py
import streamlit as st
from data_ingestion import load_data
from embedding import download_gemini_embedding
from model_api import load_model


def main():
    st.set_page_config("QA with Documents")

    doc=st.file_uploader("upload your document")

    st.header("QA with Documents(Information Retrieval)")

    user_question= st.text_input("Ask your question")

    if st.button("submit & process"):
        with st.spinner("Processing..."):
            document=load_data(doc)
            model=load_model()
            query_engine=download_gemini_embedding(model,document)

            response = query_engine.query(user_question)

            st.write(response.response)


if __name__=="__main__":
    main()

Overwriting app.py


In [116]:
!curl icanhazip.com
print()
!streamlit run app.py & npx localtunnel --port 8501

35.229.220.181


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.229.220.181:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://poor-shrimps-bet.loca.lt
[34m  Stopping...[0m
E0000 00:00:1740408923.298370   42155 init.cc:232] grpc_wait_for_shutdown_with_timeout() timed out.
^C
