In [1]:
pip install pillow opencv-python

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
from PIL import Image
import cv2
import numpy as np

input_folder = "output/raw_images"
output_folder = "output/processed_images"
os.makedirs(output_folder, exist_ok=True)

TARGET_SIZE = (256, 256)

def preprocess_image(file_path, save_path):
    image = Image.open(file_path).convert("RGB")
    gray = image.convert("L")
    resized = gray.resize(TARGET_SIZE)
    opencv_img = cv2.cvtColor(np.array(resized), cv2.COLOR_GRAY2BGR)
    denoised = cv2.fastNlMeansDenoisingColored(opencv_img, None, 10, 10, 7, 21)
    final_img = Image.fromarray(cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB))
    final_img.save(save_path)

# Loop through all images in raw_images
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        in_path = os.path.join(input_folder, filename)
        out_path = os.path.join(output_folder, filename)
        preprocess_image(in_path, out_path)
        print(f"✅ Processed: {filename}")

print("\n All images processed and saved to:", output_folder)


✅ Processed: page10_image1.jpeg
✅ Processed: page10_image2.png
✅ Processed: page10_image3.png
✅ Processed: page10_image4.png
✅ Processed: page11_image1.jpeg
✅ Processed: page11_image2.png
✅ Processed: page11_image3.png
✅ Processed: page11_image4.jpeg
✅ Processed: page12_image1.jpeg
✅ Processed: page12_image2.png
✅ Processed: page13_image1.jpeg
✅ Processed: page13_image2.png
✅ Processed: page13_image3.png
✅ Processed: page14_image1.jpeg
✅ Processed: page1_image1.jpeg
✅ Processed: page1_image10.png
✅ Processed: page1_image2.png
✅ Processed: page1_image3.png
✅ Processed: page1_image4.png
✅ Processed: page1_image5.png
✅ Processed: page1_image6.png
✅ Processed: page1_image7.png
✅ Processed: page1_image8.png
✅ Processed: page1_image9.png
✅ Processed: page2_image1.jpeg
✅ Processed: page2_image2.png
✅ Processed: page2_image3.png
✅ Processed: page2_image4.png
✅ Processed: page2_image5.png
✅ Processed: page2_image6.jpeg
✅ Processed: page2_image7.png
✅ Processed: page3_image1.jpeg
✅ Processed: pa

In [4]:
pip install PyMuPDF





In [5]:
import fitz  # PyMuPDF
import os

# Paths
pdf_path = "IMO class 1 Maths Olympiad Sample Paper 1 for the year 2024-25.pdf"
output_folder = "output/raw_images"
os.makedirs(output_folder, exist_ok=True)

# Open the PDF
doc = fitz.open(pdf_path)
print(f"✅ Opened PDF: {pdf_path}")

image_count = 0

# Loop through each page
for page_num in range(len(doc)):
    page = doc[page_num]
    images = page.get_images(full=True)

    for img_index, img in enumerate(images):
        xref = img[0]
        base_image = doc.extract_image(xref)
        image_bytes = base_image["image"]
        image_ext = base_image["ext"]
        
        image_filename = f"page{page_num + 1}_image{img_index + 1}.{image_ext}"
        image_path = os.path.join(output_folder, image_filename)

        with open(image_path, "wb") as f:
            f.write(image_bytes)

        image_count += 1
        print(f"🔹 Extracted {image_filename}")

doc.close()
print(f"\n Done! Extracted {image_count} images into: {output_folder}")

✅ Opened PDF: IMO class 1 Maths Olympiad Sample Paper 1 for the year 2024-25.pdf
🔹 Extracted page1_image1.jpeg
🔹 Extracted page1_image2.png
🔹 Extracted page1_image3.png
🔹 Extracted page1_image4.png
🔹 Extracted page1_image5.png
🔹 Extracted page1_image6.png
🔹 Extracted page1_image7.png
🔹 Extracted page1_image8.png
🔹 Extracted page1_image9.png
🔹 Extracted page1_image10.png
🔹 Extracted page2_image1.jpeg
🔹 Extracted page2_image2.png
🔹 Extracted page2_image3.png
🔹 Extracted page2_image4.png
🔹 Extracted page2_image5.png
🔹 Extracted page2_image6.jpeg
🔹 Extracted page2_image7.png
🔹 Extracted page3_image1.jpeg
🔹 Extracted page3_image2.png
🔹 Extracted page4_image1.jpeg
🔹 Extracted page4_image2.jpeg
🔹 Extracted page4_image3.png
🔹 Extracted page4_image4.png
🔹 Extracted page4_image5.png
🔹 Extracted page5_image1.jpeg
🔹 Extracted page5_image2.png
🔹 Extracted page5_image3.png
🔹 Extracted page5_image4.png
🔹 Extracted page5_image5.png
🔹 Extracted page6_image1.jpeg
🔹 Extracted page6_image2.png
🔹 Extracted

In [6]:
pip install transformers torch torchvision


Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import json
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

# Load BLIP model
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base")

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Correct folder with your actual images
input_folder = "output/processed_images"
output_json = "output/generated_questions.json"

# Store results
results = []

# Loop through each image
for filename in sorted(os.listdir(input_folder)):
    if filename.lower().endswith((".png", ".jpg", ".jpeg")):
        image_path = os.path.join(input_folder, filename)
        image = Image.open(image_path).convert("RGB")

        # Prompt to BLIP
        prompt = "What question does this image represent?"

        inputs = processor(image, prompt, return_tensors="pt").to(device)
        out = model.generate(**inputs)
        generated_question = processor.decode(out[0], skip_special_tokens=True)

        results.append({
            "image": image_path,
            "generated_question": generated_question
        })

        print(f"{filename} → {generated_question}")

# Save to JSON
with open(output_json, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2)

print(f"\n All questions saved to: {output_json}")


page10_image1.jpeg → what question does this image represent?
page10_image2.png → what question does this image represent?
page10_image3.png → what question does this image represent?
page10_image4.png → what question does this image represent?
page11_image1.jpeg → what question does this image represent?
page11_image2.png → what question does this image represent?
page11_image3.png → what question does this image represent?
page11_image4.jpeg → what question does this image represent?
page12_image1.jpeg → what question does this image represent?
page12_image2.png → what question does this image represent?
page13_image1.jpeg → what question does this image represent?
page13_image2.png → what question does this image represent?
page13_image3.png → what question does this image represent?
page14_image1.jpeg → what question does this image represent?
page1_image1.jpeg → what question does this image represent?
page1_image10.png → what question does this image represent?
page1_image2.png →