In [1]:
from llama_index.core.settings import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.readers.file import MarkdownReader
from unstructured.partition.pdf import partition_pdf
import boto3, json, base64
from llama_index.core.node_parser import MarkdownElementNodeParser
import nest_asyncio
nest_asyncio.apply()

In [2]:
filename = "./data/files/genai.pdf"
elements = partition_pdf(filename=filename,
                        strategy='hi_res',
                        extract_images_in_pdf=True,
                        extract_image_block_output_dir = "./data/images",
           )

In [3]:
for e in elements: 
    print(e.to_dict())
    print("\n")

{'type': 'Title', 'element_id': '2ab24ebd32cf1c6b4334637656670517', 'text': 'Introduction about Generative AI', 'metadata': {'detection_class_prob': 0.7658371329307556, 'coordinates': {'points': ((198.94735717773438, 206.54434204101562), (198.94735717773438, 261.4333333333336), (948.8953333333332, 261.4333333333336), (948.8953333333332, 206.54434204101562)), 'system': 'PixelSpace', 'layout_width': 1700, 'layout_height': 2200}, 'last_modified': '2024-06-09T14:44:55', 'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 1, 'file_directory': './data/files', 'filename': 'genai.pdf'}}


{'type': 'Title', 'element_id': '162abca2cc38e647bdc5de060d1189c0', 'text': 'Overview', 'metadata': {'detection_class_prob': 0.828103244304657, 'coordinates': {'points': ((199.28900146484375, 306.73431396484375), (199.28900146484375, 355.92933333333355), (397.54386666666664, 355.92933333333355), (397.54386666666664, 306.73431396484375)), 'system': 'PixelSpace', 'layout_width': 1700, 'layout_he

In [4]:
def get_image_base64(image_path: str):
    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()
    return base64.b64encode(image_bytes).decode("utf-8")

def invoke_claude_3_with_image(image_base64: str, prompt: str, profile_name: str, 
                               model_id: str = "anthropic.claude-3-sonnet-20240229-v1:0",
                               anthropic_version: str = "bedrock-2023-05-31",
                               max_tokens: int = 5000):
    boto3.setup_default_session(profile_name=profile_name)
    client = boto3.client(service_name="bedrock-runtime", region_name="us-east-1")
    response = client.invoke_model(modelId=model_id, body=json.dumps(
        {"anthropic_version": anthropic_version, "max_tokens": max_tokens, "messages": [
            {"role": "user", "content": [
                {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_base64}},
                {"type": "text", "text": prompt}
            ]}
        ]}
    ))
    result = json.loads(response.get("body").read())
    return result

In [5]:
prompt = "Look carefully at the image, convert the image to a table with markdown format, only output text."
profile_name = "YOUR_AWS_PROFILE_NAME"
output = []


for e in elements: 
    items = e.to_dict()
    if items['type'] != 'Image':
        if items['type'] == 'Title':
            output.append("## " + items['text'])
        else:
            output.append(items['text'])
    else:
        image_path = items['metadata']['image_path']
        image_base64 = get_image_base64(image_path)
        response = invoke_claude_3_with_image(image_base64, prompt, profile_name)
        output.append(response['content'][0]['text'])
    
output

['## Introduction about Generative AI',
 '## Overview',
 'Generative AI, also known as generative modeling, is a branch of artiﬁcial intelligence that focuses on creating models capable of generating new data that resembles a given dataset. These models are trained to learn and understand the underlying patterns and structures within the data, allowing them to generate new samples that share similar characteristics.',
 'Generative AI models operate by learning the probability distribution of the training data and then sampling from this distribution to create new instances. They can be broadly classiﬁed into two categories: generative models and generative adversarial networks (GANs).',
 '| Name | Description |\n|-|-|\n| LLM | Language model that generates text based on learned patterns and context. |  \n| Embedding model | Model that represents words or sentences as dense vectors, capturing semantic relationships. |\n| DAAS2 | D company specific technology solutions. |',
 'Generative 

In [6]:
markdown_content = "\n\n".join(output)

file_path = 'data/output/output.md'  # Specify the file path and name
with open(file_path, 'w') as file:
    file.write(markdown_content)

In [7]:
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
Settings.llm = Ollama(model="mistral-7b-instruct:latest", request_timeout=30.0)




In [8]:
parser = MarkdownReader()
file_extractor = {".md": parser}
documents = SimpleDirectoryReader("data/output", file_extractor=file_extractor).load_data()

md_node_parser = MarkdownElementNodeParser(include_metadata=True)
md_nodes = md_node_parser.get_nodes_from_documents(documents=documents)

0it [00:00, ?it/s]
0it [00:00, ?it/s]
1it [00:00, 31300.78it/s]
100%|██████████| 1/1 [00:06<00:00,  6.28s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]


In [9]:
index = VectorStoreIndex(md_nodes)
query_engine = index.as_query_engine(similarity_top_k=1)
response = query_engine.query("Please only use the context you have currently, tell me about DAAS2?")

In [10]:
print(response)


Based on the context in the table, DAAS2 is labeled as a technology solution but no further details are provided about its specific function or what it stands for. It's important to note that the table lacks a title or caption which could potentially provide more context. Without more information, it's impossible to accurately determine what DAAS2 is or does.
