In [None]:
from dotenv import load_dotenv
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage
import base64
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
api_key = os.environ.get("GOOGLE_API_KEY")
llm = ChatGoogleGenerativeAI(
    model= "gemini-2.0-flash",
    convert_system_message_to_human=True,
    temperature=0.0,
)

In [3]:
def img_to_llm(image_file_path):
    with open(image_file_path, "rb") as image_file:
        image_data = image_file.read()
    encoded_image = base64.b64encode(image_data).decode('utf-8')
    output_parser = StrOutputParser()

    message = HumanMessage(
        content=[
            {"type": "text", "text": "Describe the image I am sending. Send it as plain text in paragraphs with no formatting"},
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
            }
        ]
    )

    response = llm.invoke([message])
    parsed_response = output_parser.invoke(response)
    return (response.content)

In [4]:
from time import sleep
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(
    stop=stop_after_attempt(3),  # Try 3 times
    wait=wait_exponential(multiplier=1, min=4, max=10),  # Wait between 4-10 seconds, increasing exponentially
)
def img_to_llm(image_file_path):
    try:
        with open(image_file_path, "rb") as image_file:
            image_data = image_file.read()
        encoded_image = base64.b64encode(image_data).decode('utf-8')
        output_parser = StrOutputParser()

        message = HumanMessage(
            content=[
                {"type": "text", "text": "Describe the image I am sending. Send it as plain text in paragraphs with no formatting"},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
                }
            ]
        )

        response = llm.invoke([message])
        parsed_response = output_parser.invoke(response)
        return response.content
        
    except Exception as e:
        print(f"Attempt failed: {str(e)}")
        raise  # This will trigger the retry

In [5]:
output_path = './output_images'
for i in range(5):
    llm_output = ""
    image_filename = os.path.join(output_path,f'page_{i+1:03}.png')
    llm_output = img_to_llm(image_filename)
    text_filename = os.path.join(output_path, f'page_{i+1:03}.txt')
    with open(text_filename, 'w', encoding='utf-8') as f:
        f.write(llm_output)
    

In [6]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

# Define the model and its settings
model_name = "BAAI/bge-base-en-v1.5"
model_kwargs = {'device': 'cuda'}  # If you want to use GPU (CUDA)
encode_kwargs = {'normalize_embeddings': True}  # For cosine similarity

# Create the embeddings object
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,  # You can change this for different tasks
    query_instruction = "Represent this sentence for answering questions:"
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [7]:
splitter = RecursiveCharacterTextSplitter()
file_names = sorted([f for f in os.listdir(output_path) if f.endswith('.txt')])
documents = []

In [8]:
for file_name in file_names:
    with open(os.path.join(output_path, file_name), 'r', encoding='utf-8') as f:
        text = f.read()
    chunks = splitter.split_text(text)
    for i, chunk in enumerate(chunks):
        documents.append(Document(
            page_content=chunk,
            metadata={"source": file_name, "chunk_id": i}
        ))
vectorstore = FAISS.from_documents(documents, embeddings)

In [9]:
prompt = ChatPromptTemplate.from_template("""
Use the following context to answer the question at the end. 
Answer in paragraphs with no formatting. Headings can be shown with the help of colons.
If the answer is not in the context, just say "I don't know"— do not make anything up.
If the answer is in the context and the user specifies the statement "AOFS" which stands for "Answer only from slides" do not provide any other information other than what is in the context.
In the event user does not specify that statement and If the answer is in the context, Explain the context using your own knowledge of that subject, it doesn't have to be in the context.
Also include a fun explanation to the concept using different analogies.
Finally, include a very brief intuitive explanation which helps me get the basic intution of what's going on.
Context:
{context}

Question:
{input}

Answer:""")

document_chain = create_stuff_documents_chain(llm, prompt)

In [10]:
from langchain.chains import create_retrieval_chain

retriever = vectorstore.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [12]:
response = retrieval_chain.invoke({"input":"Describe the difference between Synchronous and Asyncrhonous data transfer."})
print(response["answer"])

:Synchronous vs. Asynchronous Data Transfer:

Synchronous data transfer involves the transfer of data between two devices on a network where they both operate based on a common clock pulse. This means that the sender and receiver are synchronized by a shared timing signal, ensuring that data is transmitted and received at predictable intervals.

Asynchronous data transfer, on the other hand, involves the transfer of data between two devices on a network where they operate based on a private clock pulse. In this method, the sender and receiver do not rely on a shared clock signal. Instead, the data transmission is often accompanied by start and stop bits, which signal the beginning and end of each data unit, allowing the receiver to synchronize with the data stream for each individual transmission.

:Explanation with Analogies:

Imagine a marching band (synchronous) where everyone steps in perfect unison because they're all following the same drumbeat (common clock pulse). The band memb