<a href="https://colab.research.google.com/github/deepika2k24/NDU_CERTILOG/blob/main/NDU_CERTILOG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ---------------------- SETUP ----------------------
!apt-get install -y libzbar0 poppler-utils
!pip install pytesseract opencv-python pyzbar pillow requests pdf2image python-pptx

# ---------------------- IMPORTS ----------------------
import os
import cv2
from pyzbar.pyzbar import decode
from PIL import Image
import pytesseract
import requests
import re
from google.colab import files
from pdf2image import convert_from_path
from pptx import Presentation

# ---------------------- FILE UPLOAD ----------------------
uploaded = files.upload()
uploaded_file = list(uploaded.keys())[0]
filename, ext = os.path.splitext(uploaded_file)
ext = ext.lower()
print("Uploaded:", uploaded_file)

# ---------------------- CONVERT TO IMAGE IF NEEDED ----------------------
images_to_process = []

if ext in ['.jpg', '.jpeg', '.png']:
    images_to_process.append(uploaded_file)
elif ext == '.pdf':
    pages = convert_from_path(uploaded_file, dpi=300)
    for i, page in enumerate(pages):
        img_path = f"{filename}_page{i+1}.png"
        page.save(img_path, 'PNG')
        images_to_process.append(img_path)
elif ext == '.pptx':
    prs = Presentation(uploaded_file)
    for i, slide in enumerate(prs.slides):
        img_path = f"{filename}_slide{i+1}.png"
        slide.shapes._spTree.write(img_path)  # Save slide as image
        images_to_process.append(img_path)
else:
    raise ValueError("Unsupported file type. Upload PDF, PPTX, JPG, JPEG, or PNG.")

# ---------------------- PROCESS EACH IMAGE ----------------------
for img_file in images_to_process:
    print("\nProcessing:", img_file)

    # Read image
    img = cv2.imread(img_file)
    if img is None:
        print("⚠ Could not read image. Skipping.")
        continue

    # ---- QR CODE EXTRACTION ----
    codes = decode(img)
    qr_data = None
    for code in codes:
        qr_data = code.data.decode("utf-8")
        print("QR / Certificate URL Found:", qr_data)
    if qr_data is None:
        print("No QR code found in the certificate.")

    # ---- OCR TEXT EXTRACTION ----
    ocr_text = pytesseract.image_to_string(Image.open(img_file))
    print("\nExtracted Certificate Text:\n", ocr_text)

    # ---- AUTOMATIC FIELD EXTRACTION ----
    # Name
    name = None
    for line in ocr_text.splitlines():
        if 'Name' in line or 'Participant' in line:
            name = line.split(":")[-1].strip()
            break
    if name is None:
        for line in ocr_text.splitlines():
            if len(line.split()) >= 2 and all(w[0].isupper() for w in line.split()[:2]):
                name = line.strip()
                break

    # Date(s)
    date_pattern = r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}'
    dates = re.findall(date_pattern, ocr_text)

    # Event / Competition
    event = None
    for line in ocr_text.splitlines():
        keywords = ['Event', 'Competition', 'Award', 'Certificate']
        if any(k in line for k in keywords):
            event = line.strip()
            break

    print("\nExtracted Details:")
    print("Name:", name)
    print("Event:", event)
    print("Dates:", dates)

    # ---- QR / URL VERIFICATION ----
    def verify_url(url):
        headers = {"User-Agent": "Mozilla/5.0"}
        try:
            r = requests.get(url, headers=headers, timeout=10)
            if r.status_code == 200:
                return True
            else:
                return False
        except:
            return False

    qr_valid = False
    if qr_data:
        qr_valid = verify_url(qr_data)
        if qr_valid:
            print("\n✅ QR / Certificate ID verified online")
        else:
            print("\n❌ QR / Certificate not valid / not reachable")

    # ---- FINAL VALIDATION ----
    if name and qr_valid:
        print("\n🎉 Certificate Validation Result: VALID")
        print("Name:", name)
        print("Event:", event)
        print("Dates:", dates)
        print("QR / URL:", qr_data)
    else:
        print("\n⚠ Certificate Validation Result: INVALID / Needs Manual Review")