In [1]:
from groq import Groq
import os
import subprocess
import dotenv

dotenv.load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [2]:

# Path to your image
image_path = "./Report_Images/Complete-blood-count-CBC-results.png"

client = Groq()
client.api_key = GROQ_API_KEY

In [14]:
import base64
import re

def process_image(chat_instance, image_path):
    # Function to encode the image
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    # Getting the base64 string
    base64_image = encode_image(image_path)

    def request_llm(base64_image, attempt=1):
        prompt = '''Extract the medical lab report related details from the provided image and format 
        them as a json object. Do not include any additional text, disclaimers, or notes—only return the 
        json object. Ignore all the unnecessary information like patient name, address or any-other thing
        just provide me the lab related data/table The output should start directly with the json object and 
        ensure correct details and alignment according to the image and make sure to provide the unit as well, correctly.'''
        
        # Modify prompt if first attempt fails
        if attempt > 1:
            prompt = '''Extract the table related or medical lab report related information from the
            given picture, dont consider any irrelevant information, only provide
            me the report related data, preserving alignment and make sure to provide the accurate readings
            and units from the given image. 
            Only return the json object, starting the response with the json object.'''

        chat_completion = chat_instance.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
                    ],
                }
            ],
            model="llama-3.2-90b-vision-preview",
        )
        
        return chat_completion.choices[0].message.content
    
    response = request_llm(base64_image)
    # table = extract_latex_table(response)
    
    # Retry with modified prompt if no table is found
    # if not table:
    #     response = request_llm(base64_image, attempt=2)
        # table = extract_latex_table(response)
    
    return response

def extract_latex_table(response: str) -> str:
    pattern = re.compile(r'\\begin{table}.*?\\begin{tabular}.*?\\end{table}', re.DOTALL)
    
    # Search for the LaTeX table in the response
    match = pattern.search(response)
    
    if match:
        return match.group(0).strip()  # Return only the matched table content
    else:
        return ""  # Return an empty string if no LaTeX table is found


In [15]:
processed_image = process_image(client, image_path)

In [16]:
print(processed_image)

```json
{
  "Test name": "Wite blood cells (WBC)",
  "Initial CBC": "1.90",
  "18 months later": "4.25",
  "Reference range": "4.5–11.5",
  "Unit": "K/μL"
},
{
  "Test name": "Red blood cells (RBC)",
  "Initial CBC": "3.75",
  "18 months later": "2.47",
  "Reference range": "4–5.40",
  "Unit": "M/μL"
},
{
  "Test name": "Hemoglobin (Hb)",
  "Initial CBC": "11",
  "18 months later": "8.8",
  "Reference range": "12",
  "Unit": "g/dL"
},
{
  "Test name": "Hematocrit (Hct)",
  "Initial CBC": "30.6",
  "18 months later": "26.9",
  "Reference range": "35-49",
  "Unit": "%"
},
{
  "Test name": "Mean cell volume (MCV)",
  "Initial CBC": "81.6",
  "18 months later": "108.9",
  "Reference range": "80-94",
  "Unit": "fL"
},
{
  "Test name": "Mean cell hemoglobin (MCH)",
  "Initial CBC": "29.3",
  "18 months later": "35.6",
  "Reference range": "32-36",
  "Unit": "pg"
},
{
  "Test name": "Platelets",
  "Initial CBC": "12",
  "18 months later": "51",
  "Reference range": "150-450",
  "Unit": "K/μL"

In [16]:
def compile_latex(latex_table, output_pdf="output.pdf"):
    latex_document = f"""
    \\documentclass{{article}}
    \\usepackage{{booktabs}}
    \\begin{{document}}

    {latex_table}

    \\end{{document}}
    """

    # Save LaTeX file
    tex_filename = "table.tex"
    # Save LaTeX file with UTF-8 encoding
    with open(tex_filename, "w", encoding="utf-8") as f:
        f.write(latex_document)


    # Compile LaTeX using pdflatex
    try:
        subprocess.run(["pdflatex", "-interaction=nonstopmode", tex_filename], check=True)
        print(f"PDF generated: {output_pdf}")
    except subprocess.CalledProcessError as e:
        print("Error compiling LaTeX:", e)

    # Clean up auxiliary files
    for ext in [".aux", ".log", ".tex"]:
        try:
            os.remove(f"table{ext}")
        except FileNotFoundError:
            pass



In [None]:
# Example usage
latex_code = extract_latex_table(response= processed_image)
if latex_code:
    compile_latex(latex_code)
else:
    print("No valid LaTeX table found.")

In [19]:
import json
import pandas as pd

# Extract the JSON string from the processed_image variable
# Clean up the processed_image string to extract valid JSON
json_string = processed_image.split("```json")[1].split("```")[0].strip()

# Ensure the JSON string is properly formatted as an array
if not json_string.startswith("["):
	json_string = f"[{json_string}]"

# Convert the JSON string into a Python object
data = json.loads(json_string)

# Create a pandas DataFrame from the data
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

                    Test name Initial CBC 18 months later Reference range  \
0      Wite blood cells (WBC)        1.90            4.25        4.5–11.5   
1       Red blood cells (RBC)        3.75            2.47          4–5.40   
2             Hemoglobin (Hb)          11             8.8              12   
3            Hematocrit (Hct)        30.6            26.9           35-49   
4      Mean cell volume (MCV)        81.6           108.9           80-94   
5  Mean cell hemoglobin (MCH)        29.3            35.6           32-36   
6                   Platelets          12              51         150-450   
7       Reticulocyte absolute       0.020           0.128           0.5-2   

   Unit  
0  K/μL  
1  M/μL  
2  g/dL  
3     %  
4    fL  
5    pg  
6  K/μL  
7     %  
