In [1]:
import pymupdf
import io
import base64
from openai import OpenAI
from PIL import Image
from tqdm import tqdm

In [6]:
api_key = "API KEY"
client = OpenAI(api_key = api_key)

### Main code

In [None]:
import gradio as gr
from openai import OpenAI
import pymupdf
import io
import base64
from PIL import Image
from tqdm import tqdm
import os

# Function to convert PDF to images
def convert_pdf_to_images(pdf_path):
    pdf_document = pymupdf.open(pdf_path)
    images = []
    for page_number in range(len(pdf_document)):
        page = pdf_document.load_page(page_number)
        pix = page.get_pixmap(matrix=pymupdf.Matrix(0.6, 0.7))  # Increase resolution
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        images.append(img)
    pdf_document.close()
    return images

# Function to encode image to base64
def encode_image(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Function to analyze images
def analyze_images(images, question):
    responses = []
    base64_images = [encode_image(img) for img in images]
    for idx, img in tqdm(enumerate(base64_images)):
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": question},
                    *[{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img}"}}]
                ],
            }
        ]
        client = OpenAI(api_key = "API KEY")
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=700
        )
        responses.append(response.choices[0].message.content)
    return responses

# Function to process PDF and return analysis
def process_pdf(pdf_file):
    if pdf_file is not None:
        # Ensure the 'KD' directory exists
        os.makedirs('KD', exist_ok=True)

        # Determine the text file path
        txt_file_path = os.path.join('KD', os.path.basename(pdf_file.name).replace('.pdf', '.txt'))

        # Check if the text file already exists
        if os.path.exists(txt_file_path):
            # Read the existing text file
            with open(txt_file_path, 'r') as file:
                analysis = file.read()
        else:
            # Convert PDF to images and analyze
            images = convert_pdf_to_images(pdf_file.name)
            question = "You are a Soccer Analyst, capable of reproducing data from an game's image to text in an elaborate and detailed manner. Provide a detailed description of the contents on the page. Be careful with spellings."
            analysis = analyze_images(images, question)

            # Save the analysis to a text file
            with open(txt_file_path, 'w') as file:
                file.write("\n\n".join(analysis))

        return analysis
    else:
        return "Please upload a PDF file."

# Function to handle chat interaction
def chat_with_llm(analysis, user_query):
    if not analysis:
        return "Please upload a PDF file and wait for processing to complete."

    # Prepare messages for the OpenAI model
    messages = [
        {"role": "system", "content": analysis},
        {"role": "user", "content": user_query},
    ]

    # Call the OpenAI model
    client = OpenAI(api_key = "API KEY")
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=300
    )

    return response.choices[0].message.content

# Gradio interface setup
with gr.Blocks() as demo:
    gr.Markdown("# PDF Analysis with LLM")
    gr.Markdown("This project allows you to upload a PDF file and interact with a language model to analyze the contents of the PDF.")
    
    pdf_input = gr.File(label="Upload PDF", type="filepath")
    analysis_state = gr.State()
    user_query = gr.Textbox(label="Enter your query", lines=2, interactive=False)
    chat_output = gr.Textbox(label="Chat with LLM", lines=10)
    submit_button = gr.Button("Submit", interactive=False)

    def update_analysis(pdf_file):
        analysis = process_pdf(pdf_file)
        return analysis, gr.update(interactive=True), gr.update(interactive=True)

    pdf_input.change(update_analysis, inputs=pdf_input, outputs=[analysis_state, user_query, submit_button])
    submit_button.click(chat_with_llm, inputs=[analysis_state, user_query], outputs=chat_output)

# Launch the Gradio interface
demo.launch(server_port=7102)

### Additional Testing

In [5]:
import gradio as gr
from openai import OpenAI
import pymupdf
import io
import base64
from PIL import Image
from tqdm import tqdm
import os
import ipyplot

# Global variable to store the analysis
pdf_analysis = ""

# Function to convert PDF to images
def convert_pdf_to_images(pdf_path):
    pdf_document = pymupdf.open(pdf_path)
    images = []
    for page_number in range(len(pdf_document)):
        page = pdf_document.load_page(page_number)
        pix = page.get_pixmap(matrix=pymupdf.Matrix(0.6, 0.7))  # Increase resolution
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        images.append(img)
    pdf_document.close()
    return images

# Function to encode image to base64
def encode_image(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Function to analyze images
def analyze_images(images, question):
    responses = []
    base64_images = [encode_image(img) for img in images]
    for idx, img in tqdm(enumerate(base64_images)):

        print(idx, len(img))
        messages = [
            {
                "role": "user",
                "content": question
            },
            {
                "role": "user",
                "content": f"data:image/png;base64,{img}"
            }
        ]
        client = OpenAI(api_key = "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")
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=500
        )
        responses.append(response.choices[0].message.content)
    return responses

# Function to process PDF and return analysis
def process_pdf(pdf_file):
    if pdf_file is not None:
        # Ensure the 'KD' directory exists
        os.makedirs('KD', exist_ok=True)

        # Determine the text file path
        txt_file_path = os.path.join('KD', os.path.basename(pdf_file.name).replace('.pdf', '.txt'))

        # Check if the text file already exists
        if os.path.exists(txt_file_path):
            # Read the existing text file
            with open(txt_file_path, 'r') as file:
                analysis = file.read()
        else:
            # Convert PDF to images and analyze
            images = convert_pdf_to_images(pdf_file.name)
            print(images)#ipyplot.plot_images(images)
            prompt = "You are a Soccer Analyst, capable of reproducing data from an game's image to text in an elaborate and detailed manner. Provide a detailed description of the contents on the page. Be careful with spellings, names, dates and numbers."
            print(len(images))
            analysis = analyze_images(images, prompt)

            # Save the analysis to a text file
            with open(txt_file_path, 'w') as file:
                file.write("\n\n".join(analysis))

        return analysis
    else:
        return "Please upload a PDF file."

def chat_with_llm(message, history):
    global pdf_analysis
    if not pdf_analysis:
        return [("Human", message), ("Assistant", "Please upload a PDF file before chatting.")], ""
    
    # Ensure pdf_analysis is a single string
    if isinstance(pdf_analysis, list):
        pdf_analysis_content = "\n\n".join(pdf_analysis)
    else:
        pdf_analysis_content = pdf_analysis

    # Prepare messages for the OpenAI model
    messages = [
        {
            "role": "system",
            "content": "You are a Soccer Analyst assisting with detailed game data extracted from PDF documents. Use the follwoing data about the game and answer user question. CONTEXT: {pdf_analysis_content} "
        },
        *[{"role": "user" if h[0] == "Human" else "assistant", "content": h[1]} for h in history],
        {
            "role": "user",
            "content": message
        }
    ]

    # Initialize the OpenAI client
    client = OpenAI(api_key="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")

    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=messages,
            max_tokens=500
        )
        bot_message = response.choices[0].message.content
        history.append(("Human", message))
        history.append(("Assistant", bot_message))
        return history, ""  # Return updated history and clear input
    except client.error.OpenAIError as e:
        # Handle exceptions from OpenAI
        error_message = f"An error occurred: {e}"
        history.append(("Assistant", error_message))
        return history, ""


def handle_pdf_upload(pdf_file):
    global pdf_analysis
    if pdf_file is not None:
        pdf_analysis = process_pdf(pdf_file)
        return "PDF processed successfully. You can now start chatting!"
    return "Please upload a PDF file."

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# PDF Analysis with LLM")
    gr.Markdown("Upload a PDF file to analyze its contents and chat about it.")
    
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", type="filepath")
        pdf_status = gr.Textbox(label="Status", interactive=False)

    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(placeholder="Enter your query", label="User Input")
    clear = gr.Button("Clear Chat")

    def disable_chat_input():
        return gr.update(interactive=False), gr.update(interactive=False)

    def enable_chat_input():
        return gr.update(interactive=True), gr.update(interactive=True)

    pdf_input.upload(handle_pdf_upload, inputs=pdf_input, outputs=pdf_status).then(
        enable_chat_input, outputs=[msg, clear]
    )
    msg.submit(chat_with_llm, inputs=[msg, chatbot], outputs=[chatbot, msg])
    clear.click(lambda: None, None, chatbot, queue=False)

    # Disable chat input initially
    msg.interactive = False
    clear.interactive = False

demo.launch(server_port=7033)




* Running on local URL:  http://127.0.0.1:7033

To create a public link, set `share=True` in `launch()`.




[<PIL.Image.Image image mode=RGB size=358x590 at 0x118586890>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118584B10>, <PIL.Image.Image image mode=RGB size=358x590 at 0x1182E7510>, <PIL.Image.Image image mode=RGB size=358x590 at 0x1182E6CD0>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118FC65D0>, <PIL.Image.Image image mode=RGB size=358x590 at 0x1134E3910>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118586810>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118586A90>, <PIL.Image.Image image mode=RGB size=358x590 at 0x113520DD0>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118584150>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118A168D0>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118584450>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118587890>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118FC6850>, <PIL.Image.Image image mode=RGB size=358x590 at 0x118585290>, <PIL.Image.Image image mode=RGB size=358x590 at 0x1135A49D0>, <PIL.Im

0it [00:00, ?it/s]

0 21684


1it [00:03,  3.14s/it]

1 66736


2it [00:07,  3.84s/it]

2 96400


3it [00:40, 17.10s/it]

3 89700


4it [00:55, 16.35s/it]

4 77124


5it [01:09, 15.38s/it]

5 60304


6it [01:22, 14.75s/it]

6 38544


7it [01:28, 11.68s/it]

7 83452


8it [01:42, 12.65s/it]

8 61832


9it [01:48, 10.44s/it]

9 38180


10it [01:59, 10.74s/it]

10 82800


11it [02:10, 10.77s/it]

11 54656


12it [02:20, 10.61s/it]

12 49648


13it [02:33, 11.34s/it]

13 77976


14it [02:46, 11.76s/it]

14 73100


15it [03:11, 15.64s/it]

15 78080


16it [03:34, 17.96s/it]

16 126160


17it [04:02, 20.81s/it]

17 89088


18it [04:21, 20.35s/it]

18 66864


19it [04:33, 18.01s/it]

19 56672


20it [04:43, 15.50s/it]

20 76140


21it [04:57, 15.02s/it]

21 58420


22it [05:02, 11.91s/it]

22 111452


23it [05:15, 12.28s/it]

23 75192


24it [05:20, 10.11s/it]

24 67960


25it [05:33, 13.34s/it]


In [None]:
"Who are the substitute players from Indiana and did they score"
"How did player 17 from northwestern perfrom"


In [22]:
import gradio as gr
from openai import OpenAI
import pymupdf
import io
import base64
from PIL import Image
from tqdm import tqdm
import os

# Ensure OpenAI API Key is set in environment variables
API_KEY = "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"
if not API_KEY:
    raise ValueError("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")

# Set OpenAI API key
client = OpenAI(api_key = API_KEY)

# Global variable to store the analysis
pdf_analysis = ""

# Function to convert PDF to images
def convert_pdf_to_images(pdf_path):
    pdf_document = pymupdf.open(pdf_path)
    images = []
    for page_number in range(len(pdf_document)):
        page = pdf_document.load_page(page_number)
        pix = page.get_pixmap(matrix=pymupdf.Matrix(0.6, 0.7))  # Adjust resolution as needed
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        images.append(img)
    pdf_document.close()
    return images

# Function to encode image to base64
def encode_image(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Function to analyze images
def analyze_images(images, question, log):
    responses = []
    base64_images = [encode_image(img) for img in images]
    
    for idx, img in tqdm(enumerate(base64_images), total=len(base64_images), desc="Analyzing Images"):
        log += f"Processing image {idx + 1}/{len(base64_images)}\n"
        print(f"Processing image {idx + 1}/{len(base64_images)}")
        
        messages = [
            {
                "role": "user",
                "content": question
            },
            {
                "role": "user",
                "content": f"data:image/png;base64,{img}"
            }
        ]
        
        #try:
        client = OpenAI(api_key = API_KEY)
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=500
        )
        analysis_text = response.choices[0].message.content.strip()
        responses.append(analysis_text)
        log += f"Image {idx + 1} processed successfully.\n"
        # except openai.error.OpenAIError as e:
        #     error_msg = f"Error processing image {idx + 1}: {e}\n"
        #     responses.append(error_msg)
        #     log += error_msg
    
    return responses, log

# Function to summarize analysis (Optional but recommended)
def summarize_analysis(full_text):
    # try:
    client = OpenAI(api_key = API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a summarization assistant."},
            {"role": "user", "content": f"Please provide a concise summary of the following text:\n\n{full_text}"}
        ],
        max_tokens=500
    )
    summary = response.choices[0].message.content.strip()
    return summary
    # except openai.error.OpenAIError as e:
    #     return f"Error during summarization: {e}"

# Function to process PDF and return analysis
def process_pdf(pdf_file, log):
    global pdf_analysis
    if pdf_file is not None:
        # Ensure the 'KD' directory exists
        os.makedirs('KD', exist_ok=True)
        # Determine the text file path
        txt_file_path = os.path.join('KD', os.path.basename(pdf_file.name).replace('.pdf', '.txt'))
        
        # Check if the text file already exists
        if os.path.exists(txt_file_path):
            # Read the existing text file
            with open(txt_file_path, 'r', encoding='utf-8') as file:
                analysis = file.read()
            log += "Existing analysis found. Loading from file.\n"
        else:
            # Convert PDF to images and analyze
            images = convert_pdf_to_images(pdf_file.name)
            log += f"Converted PDF to {len(images)} images.\n"
            print(f"Converted PDF to {len(images)} images.")
            
            prompt = ("You are a Soccer Analyst, capable of reproducing data from an game's image to text in an elaborate and detailed manner. "
                      "Provide a detailed description of the contents on the page. Be careful with spellings, names, dates, and numbers.")
            
            analysis, log = analyze_images(images, prompt, log)
            log += "Image analysis completed.\n"
            print("Image analysis completed.")
            
            # Optional: Summarize the analysis to fit within token limits
            # summarized_analysis = summarize_analysis("\n\n".join(analysis))
            # if not summarized_analysis.startswith("Error during summarization"):
            #     analysis = summarized_analysis
            #     log += "Summarization completed successfully.\n"
            # else:
            #     log += f"{summarized_analysis}\n"
            
            # Save the analysis to a text file

            with open(txt_file_path, 'w', encoding='utf-8') as file:
                if isinstance(analysis, list):
                    file.write("\n\n".join(analysis))
                else:
                    file.write(analysis)
            # with open(txt_file_path, 'w', encoding='utf-8') as file:
            #     file.write(analysis)
            log += f"Analysis saved to {txt_file_path}.\n"
        
        pdf_analysis = analysis  # Store the analysis for chatbot
        return "PDF processed successfully. You can now start chatting!", log
    else:
        log += "No PDF file uploaded.\n"
        return "Please upload a PDF file.", log

# Function to handle chat with LLM
def chat_with_llm(message, history, log):
    global pdf_analysis
    if not pdf_analysis:
        log += "Chat attempted without PDF analysis.\n"
        return [("Human", message), ("Assistant", "Please upload a PDF file before chatting.")], "", log
    
    # Ensure pdf_analysis is a single string
    if isinstance(pdf_analysis, list):
        pdf_analysis_content = "\n\n".join(pdf_analysis)
    else:
        pdf_analysis_content = pdf_analysis
    
    # Prepare messages for the OpenAI model
    messages = [
        {
            "role": "system",
            "content": "You are a Soccer Analyst assisting with detailed game data extracted from PDF documents. Use the following data about the game to answer the user's questions."
        },
        {
            "role": "user",
            "content": f"CONTEXT: {pdf_analysis_content}"
        },
        *[{"role": "user" if h[0] == "Human" else "assistant", "content": h[1]} for h in history],
        {
            "role": "user",
            "content": message
        }
    ]
    
    # try:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=500
    )
    bot_message = response.choices[0].message.content.strip()
    history.append(("Human", message))
    history.append(("Assistant", bot_message))
    log += f"User: {message}\nAssistant: {bot_message}\n"
    return history, "", log
    # except openai.error.OpenAIError as e:
    #     error_message = f"An error occurred: {e}"
    #     history.append(("Assistant", error_message))
    #     log += f"{error_message}\n"
    #     return history, "", log

# Function to handle PDF upload
def handle_pdf_upload(pdf_file, log):
    if pdf_file is not None:
        status, log = process_pdf(pdf_file, log)
        return status, log
    else:
        log += "PDF upload failed. No file provided.\n"
        return "Please upload a PDF file.", log

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# PDF Analysis with LLM")
    gr.Markdown("Upload a PDF file to analyze its contents and chat about it.")
    
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", type="filepath")  # Corrected type
        pdf_status = gr.Textbox(label="Status", interactive=False)
    
    with gr.Row():
        chatbot = gr.Chatbot(height=400)
        msg = gr.Textbox(placeholder="Enter your query", label="User Input")
        clear = gr.Button("Clear Chat")
    
    with gr.Row():
        log_box = gr.Textbox(label="Logging", lines=10, interactive=False)
    
    def disable_chat_input():
        return gr.update(interactive=False), gr.update(interactive=False)
    
    def enable_chat_input():
        return gr.update(interactive=True), gr.update(interactive=True)
    
    # Initially disable chat input and clear button
    msg.interactive = False
    clear.interactive = False
    
    # Handle PDF upload
    pdf_input.upload(
        fn=handle_pdf_upload,
        inputs=[pdf_input, log_box],
        outputs=[pdf_status, log_box]
    ).then(
        enable_chat_input,
        outputs=[msg, clear]
    )
    
    # Handle user messages
    msg.submit(
        fn=chat_with_llm,
        inputs=[msg, chatbot, log_box],
        outputs=[chatbot, msg, log_box]
    )
    
    # Handle clearing the chat
    clear.click(
        fn=lambda: ([], "", "Chat cleared.\n"),
        inputs=None,
        outputs=[chatbot, msg, log_box],
        queue=False
    )
    
    # Ensure the chatbot input is disabled initially
    pdf_status.value = "Please upload a PDF file to begin."
    
demo.launch(server_port=7042)



* Running on local URL:  http://127.0.0.1:7042

To create a public link, set `share=True` in `launch()`.




Converted PDF to 24 images.


Analyzing Images:   0%|          | 0/24 [00:00<?, ?it/s]

Processing image 1/24


Analyzing Images:   4%|▍         | 1/24 [00:04<01:42,  4.44s/it]

Processing image 2/24


Analyzing Images:   8%|▊         | 2/24 [00:14<02:56,  8.02s/it]

Processing image 3/24


Analyzing Images:  12%|█▎        | 3/24 [00:33<04:27, 12.73s/it]

Processing image 4/24


Analyzing Images:  17%|█▋        | 4/24 [00:43<03:56, 11.83s/it]

Processing image 5/24


Analyzing Images:  21%|██        | 5/24 [01:02<04:32, 14.36s/it]

Processing image 6/24


Analyzing Images:  25%|██▌       | 6/24 [01:10<03:39, 12.19s/it]

Processing image 7/24


Analyzing Images:  29%|██▉       | 7/24 [01:14<02:38,  9.32s/it]

Processing image 8/24


Analyzing Images:  33%|███▎      | 8/24 [01:25<02:40, 10.06s/it]

Processing image 9/24


Analyzing Images:  38%|███▊      | 9/24 [01:47<03:27, 13.86s/it]

Processing image 10/24


Analyzing Images:  42%|████▏     | 10/24 [01:57<02:56, 12.61s/it]

Processing image 11/24


Analyzing Images:  46%|████▌     | 11/24 [02:08<02:36, 12.02s/it]

Processing image 12/24


Analyzing Images:  50%|█████     | 12/24 [02:12<01:53,  9.49s/it]

Processing image 13/24


Analyzing Images:  54%|█████▍    | 13/24 [02:15<01:25,  7.78s/it]

Processing image 14/24


Analyzing Images:  58%|█████▊    | 14/24 [02:21<01:12,  7.23s/it]

Processing image 15/24


Analyzing Images:  62%|██████▎   | 15/24 [02:31<01:11,  7.95s/it]

Processing image 16/24


Analyzing Images:  67%|██████▋   | 16/24 [02:38<01:01,  7.68s/it]

Processing image 17/24


Analyzing Images:  71%|███████   | 17/24 [02:54<01:10, 10.12s/it]

Processing image 18/24


Analyzing Images:  75%|███████▌  | 18/24 [03:07<01:05, 10.98s/it]

Processing image 19/24


Analyzing Images:  79%|███████▉  | 19/24 [03:14<00:49,  9.94s/it]

Processing image 20/24


Analyzing Images:  83%|████████▎ | 20/24 [03:26<00:41, 10.45s/it]

Processing image 21/24


Analyzing Images:  88%|████████▊ | 21/24 [03:40<00:34, 11.61s/it]

Processing image 22/24


Analyzing Images:  92%|█████████▏| 22/24 [03:49<00:21, 10.78s/it]

Processing image 23/24


Analyzing Images:  96%|█████████▌| 23/24 [03:56<00:09,  9.48s/it]

Processing image 24/24


Analyzing Images: 100%|██████████| 24/24 [04:00<00:00, 10.02s/it]


Image analysis completed.
