# Setup Gemini

In [None]:
%%capture
%pip install -q -U google-generativeai

In [None]:
import google.generativeai as genai
from IPython.display import Markdown

In [None]:
import os
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))

# Load test image

In [None]:
import base64
from io import BytesIO
import os

from IPython.display import HTML, display
from PIL import Image


def convert_to_base64(pil_image):
    """
    Convert PIL images to Base64 encoded strings

    :param pil_image: PIL image
    :return: Re-sized Base64 string
    """

    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str


def plt_img_base64(img_base64):
    """
    Display base64 encoded string as image

    :param img_base64:  Base64 string
    """
    # Create an HTML img tag with the base64 string as the source
    image_html = f'<img src="data:image/jpeg;base64,{img_base64}" />'
    # Display the image by rendering the HTML
    display(HTML(image_html))

path = os.getcwd()
parent_directory = os.path.abspath(os.path.join(path, os.pardir))
output_dir = parent_directory + "/Experiments/Extracted_Images"
file_path = output_dir + "/figure-31-42.jpg"
pil_image = Image.open(file_path)
image_b64 = convert_to_base64(pil_image)
plt_img_base64(image_b64)

In [None]:
# Choose a Gemini API model.
model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")

# Prompt the model with text and the previously uploaded image.
response = model.generate_content([pil_image, "Descrivi la parte principale della finestra, nell'immagine fornita."])

Markdown(">" + response.text)

In [None]:
def convert_to_base64_png(pil_image):
    """
    Convert PIL images to Base64 encoded strings

    :param pil_image: PIL image
    :return: Re-sized Base64 string
    """

    buffered = BytesIO()
    pil_image.save(buffered, format="PNG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str

In [None]:
plumber_path = parent_directory + "/Experiments/Images"
file_path_plumber = plumber_path + "/image_40.png"
image_pil_plumber = Image.open(file_path_plumber)
image_b64 = convert_to_base64_png(image_pil_plumber)
plt_img_base64(image_b64)

In [None]:
# Prompt the model with text and the previously uploaded image.
prompt = """
Descrivi la parte principale della finestra, nell'immagine fornita.
Non descrivere schede e barre appartenenti al browser. Concentrati sulla videata del software.
"""
response = model.generate_content([image_pil_plumber, prompt])

Markdown(">" + response.text)

In [None]:
plumber_path = parent_directory + "/Experiments/Images"
file_path_plumber = plumber_path + "/image_41.png"
image_pil_plumber = Image.open(file_path_plumber)
image_b64 = convert_to_base64_png(image_pil_plumber)
plt_img_base64(image_b64)

In [None]:
# Prompt the model with text and the previously uploaded image.
prompt = """
Descrivi la parte principale della finestra, nell'immagine fornita.
Non descrivere schede e barre appartenenti al browser. 
Concentrati sulla videata del software.
"""
response = model.generate_content([image_pil_plumber, prompt])

Markdown(">" + response.text)

# Unstructured image extraction with placeholder

In [None]:
parent_directory = os.path.abspath(os.path.join(path, os.pardir))
filepath = parent_directory + "/Doc_Panthera/Gestionale/VEN_Contratti_Vendita_Ordini_Aperti.pdf"
print("Filepath:", filepath)
print("Output path:", output_dir)

In [None]:
import os
from unstructured.partition.pdf import partition_pdf
import json

# Parse the PDF
elements = partition_pdf(
    filepath,
    extract_images_in_pdf=True,
    strategy="hi_res",
    languages=['ita'],
    extract_image_block_output_dir=output_dir
)

# Prepare the output structure and image metadata storage
processed_elements = []
image_metadata = {}

for el in elements:
    el_dict = el.to_dict()  # Convert element to a dictionary
    el_type = el_dict.get("type", None)  # Get the type of the element

    if el_type == "Image":
        # Get the image file path from the metadata
        image_filename = el_dict.get("metadata", {}).get("filename")
        image_path = os.path.join(output_dir, image_filename) if image_filename else None
        
        # Create a placeholder for the image
        placeholder = f"[IMAGE: {el_dict['element_id']}]"
        processed_elements.append(placeholder)
        
        # Save the image metadata for future processing
        image_metadata[el_dict['element_id']] = {
            "metadata": el_dict,
            "image_path": image_path,
        }
    else:
        # For other types, keep the text as is
        processed_elements.append(el_dict.get("text", ""))

# Combine the text and placeholders into a single output
output_text = "\n".join(processed_elements)

# Save the image metadata for later use
with open("image_metadata.json", "w", encoding="utf-8") as f:
    json.dump(image_metadata, f, indent=2)

# Print the text output with placeholders
print(output_text)

# Optionally save the text output
with open("processed_output.txt", "w", encoding="utf-8") as f:
    f.write(output_text)

In [None]:
import json

# Load the image metadata
with open("image_metadata.json", "r", encoding="utf-8") as f:
    image_metadata = json.load(f)
print(f"There are {len(image_metadata)} images metadata extracted form the file.")

In [None]:
import os

def count_files_in_directory(directory_path):
    # Get a list of all files and directories in the specified path
    files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
    return len(files)

# Example usage
directory_path = parent_directory + "/Experiments/Extracted_Images"
file_count = count_files_in_directory(directory_path)
print(f"There are {file_count} images files in the directory.")

In [None]:
# Path to metadata and text files
metadata_file = "image_metadata.json"
processed_text_file = "processed_output.txt"
final_output_file = "final_output.txt"

In [None]:
import os
from PIL import Image

def process_images_and_get_descriptions(directory_path, model):
    """
    Processes each .jpg image in the specified folder, opens it using PIL, 
    and calls the LLM model to get a description.

    Args:
    - directory_path: The path to the folder containing images.
    - model: The LLM model that will generate descriptions for images.

    Returns:
    - descriptions: A dictionary where the keys are image filenames 
      and the values are the generated descriptions.
    """
    descriptions = {}

    # Loop through the directory and find all .jpg files
    for filename in os.listdir(directory_path):
        if filename.lower().endswith('.jpg'):
            image_path = os.path.join(directory_path, filename)

            # Open the image using PIL
            try:
                with Image.open(image_path) as img:
                    # Generate description using the LLM model
                    print(f"Processing image: {filename}")
                    response = model.generate_content(
                        [image_path, "Descrivi la parte principale della finestra, nell'immagine fornita."]
                    )
                    
                    # Extract description from the model's response
                    description = response.text
                    descriptions[filename] = description
            except Exception as e:
                print(f"Error processing {filename}: {e}")
                descriptions[filename] = "Error processing image"

    return descriptions

descriptions = process_images_and_get_descriptions(directory_path, model)

In [None]:
descriptions

In [None]:
import json
import pprint

def extract_image_path_and_id_from_element(element):
    """
    Extracts the 'image_path' and 'element_id' from a given element.

    Args:
    - element: A dictionary representing an element containing metadata.

    Returns:
    - (image_path, element_id): A tuple containing the image path and element id if present, else None for each.
    """
    # Check if the element contains 'metadata', 'image_path', and 'element_id'
    if 'metadata' in element:
        image_path = element['metadata'].get('image_path')
        element_id = element.get('element_id')
        
        # Return both image_path and element_id if they are present
        if image_path and element_id:
            return image_path, element_id
        else:
            return None
    else:
        return None

def extract_image_paths_and_ids(metadata_file):
    """
    Extracts the 'image_path' and 'element_id' for each element in the image metadata JSON file.

    Args:
    - metadata_file: The path to the image metadata JSON file.

    Returns:
    - image_data: A list of tuples containing the image path and element id.
    """
    image_data = []

    try:
        # Load the image metadata from the JSON file
        with open(metadata_file, "r", encoding="utf-8") as f:
            image_metadata = json.load(f)
        
        # Iterate through the elements in the metadata and extract the image_path and element_id
        for element in image_metadata.values():
            # Use the extract_image_path_and_id_from_element function to get both the image path and element id
            data = extract_image_path_and_id_from_element(element)
            if data:
                image_data.append(data)
            else:
                print(f"Warning: No 'image_path' or 'element_id' found for element.")
        
    except Exception as e:
        print(f"Error reading metadata file: {e}")

    return image_data

# Example usage:
metadata_file = "image_metadata.json"  # Path to the image metadata file
image_data = extract_image_paths_and_ids(metadata_file)

# Print extracted image paths and element ids
for image_path, element_id in image_data:
    print(f"Image Path: {os.path.basename(image_path)}, Element ID: {element_id}")

In [None]:
import os

def integrate_descriptions(metadata_file, descriptions, processed_output_path, final_output_path):
    """
    Integrates image descriptions into the text by replacing [IMAGE: element_id] placeholders.

    Args:
    - metadata_file: Path to the metadata JSON file containing image_path and element_id.
    - descriptions: Dictionary where keys are image filenames, and values are descriptions.
    - processed_output_path: Path to the text file containing placeholders.
    - final_output_path: Path to save the final text with descriptions integrated.

    Returns:
    - None
    """
    # Step 1: Create a mapping of element_id to descriptions
    element_id_to_description = {}

    # Load metadata
    with open(metadata_file, "r", encoding="utf-8") as f:
        image_metadata = json.load(f)

    # Map element_id to descriptions
    for element in image_metadata.values():
        if 'metadata' in element:
            image_path = element['metadata'].get('image_path')
            element_id = element.get('element_id')
            if image_path and element_id:
                # Extract filename from image_path
                filename = os.path.basename(image_path)
                # Get description if available
                description = descriptions.get(filename)
                if description:
                    element_id_to_description[element_id] = description

    # Step 2: Replace placeholders in the processed_output text
    with open(processed_output_path, "r", encoding="utf-8") as processed_file:
        processed_text = processed_file.read()

    # Replace [IMAGE: element_id] placeholders with descriptions
    for element_id, description in element_id_to_description.items():
        placeholder = f"[IMAGE: {element_id}]"
        processed_text = processed_text.replace(placeholder, description)

    # Step 3: Write the final output to a file
    with open(final_output_path, "w", encoding="utf-8") as final_file:
        final_file.write(processed_text)

    print(f"Descriptions successfully integrated into {final_output_path}")
    return processed_text

metadata_file = "image_metadata.json"
processed_output_path = "processed_output.txt"
final_output_path = "final_output.txt"

processed_text = integrate_descriptions(metadata_file, descriptions, processed_output_path, final_output_path)

In [None]:
import csv

file_name = "Prova"
# Save the updated text to a new CSV file with file_name and updated_text as columns
csv_name = "Gemini_augmented_dataset.csv"
with open(csv_name, "w", encoding="utf-8", newline="") as csvfile:
    csv_writer = csv.writer(csvfile, delimiter="|")
    csv_writer.writerow(["file_name", "updated_text"])  # Write header
    csv_writer.writerow([file_name, processed_text])  # Write data row

print("Text updated with image descriptions and saved to 'Gemini_augmented_dataset.csv'.")

# Retrieval

In [None]:
# Load the original dataset for RAG
import pandas as pd

csv_name = "Gemini_augmented_dataset.csv"
data_df = pd.read_csv(csv_name, names = ['file_name', 'updated_text'], delimiter="|")
data_df = data_df.drop(index = 0)
data_df

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import DataFrameLoader
import pprint

loader = DataFrameLoader(data_df, page_content_column="updated_text")
docs_data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)

In [None]:
# Split documents and keep track of chunk numbers within each document
splits = []
for doc in docs_data:
    # Split each document into chunks
    doc_chunks = text_splitter.split_documents([doc])
    
    # Add chunk number as metadata
    for chunk_num, chunk in enumerate(doc_chunks):
        chunk.metadata["chunk_number"] = chunk_num + 1  # Adding 1 to start counting from 1
        splits.append(chunk)

# Print the first few splits with chunk numbers
pprint.pprint(splits[0:6])
pprint.pprint(len(splits))

In [None]:
from FlagEmbedding import BGEM3FlagModel
model_fp16 = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)

In [None]:
class M3EmbeddingFP16:
    def embed_documents(self, texts):
        return model_fp16.encode(texts)['dense_vecs']
    
    def __call__(self, texts):
        return self.embed_documents(texts)

In [None]:
from langchain_community.vectorstores import FAISS

embd = M3EmbeddingFP16()
vectorstore = FAISS.from_documents(documents=splits, embedding=embd)
vectorstore.save_local("gemini_trial_VEN_index")

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

In [None]:
question = "Come posso decidere se nel calcolo della percentuale di saturazione del contratto vadano considerate anche la quantità in previsione?"
retrieved_docs = retriever.invoke(question)
retrieved_docs

In [None]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """
Comportati come un assistente che risponde alle domande del cliente.
Rispondi alla domanda basandoti solo sui seguenti documenti: {context}
Rispondi in modo conciso e chiaro, spiegando passo passo al cliente le azioni necessarie da effettuare.
Se possibile, dai indicazioni dettagliate al cliente, su come risolvere il problema o effettuare l'azione desiderata.
Evita troppe ripetizioni nella risposta fornita.
Quando spieghi che cosa è o cosa significa un certo elemento richiesto, non parlarne come se fosse un problema.

In caso di più domande rispondi solo a quelle inerenti alla documentazione e rimani a disposizione per altre domande sull'argomento, specificando,
invece, che le altre domande non sono state trovate pertinenti in questo contesto.

Domanda relativa al software Panthera: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

In [None]:
import os
from langchain_openai import ChatOpenAI
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [None]:
# LLM - the used model
model = ChatOpenAI(model_name="gpt-4o", temperature=0)
# max_token

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

# Question
pprint.pprint(rag_chain.invoke(question))