## To extract the text from image and insert it into the database

In [1]:
import cv2
import numpy as np
import pytesseract
import mysql.connector

# Update the path to the Tesseract executable
def enlarge_process_blur_extract_text(image_path, standard_size=(1024, 768), ksize=(7, 7)):
    # Load the image
    img = cv2.imread(image_path, -1)

    # Check if the image loaded successfully
    if img is None:
        print(f"Error: Could not load image from {image_path}. Please check the path.")
        return None  # or raise an exception

    # Get the current dimensions of the image
    height, width = img.shape[:2]

    # Check if the image is smaller than the standard size
    if width < standard_size[0] or height < standard_size[1]:
        # Calculate the scaling factors
        scale_width = standard_size[0] / width
        scale_height = standard_size[1] / height
        scale = max(scale_width, scale_height)

        # Calculate the new dimensions of the image
        new_width = int(width * scale)
        new_height = int(height * scale)
        new_dimensions = (new_width, new_height)

        # Resize the image
        img = cv2.resize(img, new_dimensions, interpolation=cv2.INTER_LINEAR)

    # Split the image into RGB planes
    rgb_planes = cv2.split(img)

    result_planes = []
    result_norm_planes = []

    # Process each plane to remove shadows
    for plane in rgb_planes:
        dilated_img = cv2.dilate(plane, np.ones((7, 7), np.uint8))
        bg_img = cv2.medianBlur(dilated_img, 21)
        diff_img = 255 - cv2.absdiff(plane, bg_img)
        norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
        result_planes.append(diff_img)
        result_norm_planes.append(norm_img)

    # Merge the result planes
    result = cv2.merge(result_planes)
    result_norm = cv2.merge(result_norm_planes)

    # Convert the image to grayscale
    gray_img = cv2.cvtColor(result_norm, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur to the grayscale image
    blurred_img = cv2.GaussianBlur(gray_img, ksize, 0)

    # Additional preprocessing steps
    # Apply thresholding to binarize the image
    _, binarized_img = cv2.threshold(blurred_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Display the enlarged, blurred, and processed image
    cv2.imshow('Binarized Image', binarized_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Use OCR to extract text
    text = pytesseract.image_to_string(binarized_img, lang='eng')

    # Print the extracted text
    print("Extracted Text:")
    print(text)

    return text

# Database configuration
db_config = {
    'user': 'root',
    'password': 'fcp@123',
    'host': 'localhost',
    'database': 'Extracted_text'
}

def insert_extracted_text(extracted_text):
    try:
        # Connect to the database
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()

        # SQL query to insert the extracted text
        query = "INSERT INTO ExtractedText (extracted_text) VALUES (%s)"
        cursor.execute(query, (extracted_text,))

        # Commit the transaction
        conn.commit()

        # Close the cursor and connection
        cursor.close()
        conn.close()

        print("Text inserted successfully")

    except mysql.connector.Error as err:
        print(f"Error: {err}")

# Example usage
image_path = r"C:\Users\Janhavi\OneDrive\Desktop\Fergusson\Project Sem2\sample\WhatsApp Image 2025-03-02 at 01.21.20_71344006.jpg"
extracted_text = enlarge_process_blur_extract_text(image_path, standard_size=(1024, 768), ksize=(7, 7))
if extracted_text:
    insert_extracted_text(extracted_text)

Extracted Text:
1. The Treasure House within you

Yr have infinite riches within your reach. To gain them, all

have to do is open your mental eyes and behold the tressuny
house of infinity within you. There is a storehouse within you from
which you can extract everything you need to live life gloriously
joyously, and abundantly. ,

Many people are unaware of their own potential because they do
not know about this storehouse of infinite intelligence and boundless
love within themselves. Whatever you want, you can draw it forth.

A magnetized piece of iron will lift about twelve times its own
weight. But if you demagnetize this same piece of iron, it will not lift
even a feather.

In the same way, there are two types of people. Those who are
magnetized are full of confidence and faith. They know they are
born to succeed and to win.

Others, so many others, are demagnetized. They are full of fears
and doubts. When an opportunity comes, they say, “What if I fail?
I might lose my money. Pe

## finding matching and showing the relevant resource

In [8]:
import cv2
import pytesseract
import mysql.connector

# Database configuration
db_config = {
    'user': 'root',
    'password': 'fcp@123',
    'host': 'localhost',  # Corrected hostname
    'database': 'Extracted_text'
}

def capture_image(image_path):
    # Capture an image using the webcam (you can also load an image from a file)
    # For simplicity, we'll load an image from a file
    img = cv2.imread(image_path, -1)
    
    # Check if the image loaded successfully
    if img is None:
        print(f"Error: Could not load image from {image_path}. Please check the path.")
        return None  # or raise an exception
    
    return img

def extract_text_from_image(img):
    if img is None:
        return ""  # Return an empty string if image is not loaded

    # Use pytesseract to extract text from the image
    text = pytesseract.image_to_string(img, lang='eng')
    return text

def find_matching_texts(extracted_text):
    try:
        # Connect to the database
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()

        # Split the extracted text into lines
        lines = extracted_text.split('\n')

        matching_texts = []
        
        # Iterate through each line and search for matches in the database
        for line in lines:
            if not line.strip():
                continue  # Skip empty lines
            query = "SELECT extracted_text FROM ExtractedText WHERE extracted_text LIKE %s"
            cursor.execute(query, ("%" + line + "%",))

            # Fetch all matching texts
            texts = cursor.fetchall()
            if texts:
                matching_texts.extend([text[0] for text in texts])
        
        # Close the cursor and connection
        cursor.close()
        conn.close()

        return matching_texts

    except mysql.connector.Error as err:
        print(f"Error: {err}")
        return None

# Example usage
image_path = r"C:\Users\Janhavi\OneDrive\Desktop\Fergusson\Project Sem2\sample\WhatsApp Image 2025-03-02 at 01.21.20_71344006.jpg"  # Ensure this is an image file
img = capture_image(image_path)
extracted_text = extract_text_from_image(img)

matching_texts = find_matching_texts(extracted_text)
if matching_texts:
    print("Matching references:")
    for text in matching_texts:
        print(text)
else:
    print("No matching texts found")


Matching references:
1. The Treasure House within you

Yr have infinite riches within your reach. To gain them, all

have to do is open your mental eyes and behold the tressuny
house of infinity within you. There is a storehouse within you from
which you can extract everything you need to live life gloriously
joyously, and abundantly. ,

Many people are unaware of their own potential because they do
not know about this storehouse of infinite intelligence and boundless
love within themselves. Whatever you want, you can draw it forth.

A magnetized piece of iron will lift about twelve times its own
weight. But if you demagnetize this same piece of iron, it will not lift
even a feather.

In the same way, there are two types of people. Those who are
magnetized are full of confidence and faith. They know they are
born to succeed and to win.

Others, so many others, are demagnetized. They are full of fears
and doubts. When an opportunity comes, they say, “What if I fail?
I might lose my mone

## To find the relevant text using an image or keyword

In [None]:
import cv2
import numpy as np
import pytesseract
import mysql.connector

# Update the path to the Tesseract executable
def enlarge_process_blur_extract_text(image_path, standard_size=(1024, 768), ksize=(7, 7)):
    # Load the image
    img = cv2.imread(image_path, -1)

    # Check if the image loaded successfully
    if img is None:
        print(f"Error: Could not load image from {image_path}. Please check the path.")
        return None  # or raise an exception

    # Get the current dimensions of the image
    height, width = img.shape[:2]

    # Check if the image is smaller than the standard size
    if width < standard_size[0] or height < standard_size[1]:
        # Calculate the scaling factors
        scale_width = standard_size[0] / width
        scale_height = standard_size[1] / height
        scale = max(scale_width, scale_height)

        # Calculate the new dimensions of the image
        new_width = int(width * scale)
        new_height = int(height * scale)
        new_dimensions = (new_width, new_height)

        # Resize the image
        img = cv2.resize(img, new_dimensions, interpolation=cv2.INTER_LINEAR)

    # Split the image into RGB planes
    rgb_planes = cv2.split(img)

    result_planes = []
    result_norm_planes = []

    # Process each plane to remove shadows
    for plane in rgb_planes:
        dilated_img = cv2.dilate(plane, np.ones((7, 7), np.uint8))
        bg_img = cv2.medianBlur(dilated_img, 21)
        diff_img = 255 - cv2.absdiff(plane, bg_img)
        norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
        result_planes.append(diff_img)
        result_norm_planes.append(norm_img)

    # Merge the result planes
    result = cv2.merge(result_planes)
    result_norm = cv2.merge(result_norm_planes)

    # Convert the image to grayscale
    gray_img = cv2.cvtColor(result_norm, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur to the grayscale image
    blurred_img = cv2.GaussianBlur(gray_img, ksize, 0)

    # Additional preprocessing steps
    # Apply thresholding to binarize the image
    _, binarized_img = cv2.threshold(blurred_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Display the enlarged, blurred, and processed image
    cv2.imshow('Binarized Image', binarized_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Use OCR to extract text
    text = pytesseract.image_to_string(binarized_img, lang='eng')

    # Print the extracted text
    print("Extracted Text:")
    print(text)

    return text

# Database configuration
db_config = {
    'user': 'root',
    'password': 'fcp@123',
    'host': 'localhost',
    'database': 'Extracted_text'
}

def insert_extracted_text(extracted_text):
    try:
        # Connect to the database
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()

        # SQL query to insert the extracted text
        query = "INSERT INTO ExtractedText (extracted_text) VALUES (%s)"
        cursor.execute(query, (extracted_text,))

        # Commit the transaction
        conn.commit()

        # Close the cursor and connection
        cursor.close()
        conn.close()

        print("Text inserted successfully")

    except mysql.connector.Error as err:
        print(f"Error: {err}")

def get_text_from_db(keyword):
    try:
        # Connect to the database
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()

        # SQL query to search for the keyword
        query = "SELECT extracted_text FROM ExtractedText WHERE extracted_text LIKE %s"
        cursor.execute(query, ('%' + keyword + '%',))

        # Fetch the matching text
        results = cursor.fetchall()

        # Close the cursor and connection
        cursor.close()
        conn.close()

        if results:
            print("Matching Text from Database:")
            for result in results:
                print(result[0])
        else:
            print("No matching text found in the database.")

    except mysql.connector.Error as err:
        print(f"Error: {err}")

# Example usage
input_type = input("Enter 'image' to provide an image path or 'keyword' to search in the database: ").strip().lower()

if input_type == 'image':
    image_path = input("Enter the image path: ").strip()
    extracted_text = enlarge_process_blur_extract_text(image_path, standard_size=(1024, 768), ksize=(7, 7))
    if extracted_text:
        insert_extracted_text(extracted_text)
elif input_type == 'keyword':
    keyword = input("Enter the keyword to search: ").strip()
    get_text_from_db(keyword)
else:
    print("Invalid input. Please enter either 'image' or 'keyword'.")

## To extract text from PDF and insert it into the database as a file

In [5]:
import fitz  # PyMuPDF
import mysql.connector
import os

# Database configuration
db_config = {
    'user': 'root',
    'password': 'fcp@123',
    'host': 'localhost',  # Corrected hostname
    'database': 'Extracted_text'
}
def extract_text_from_pdf(pdf_path):
    # Open the PDF file
    pdf_document = fitz.open(pdf_path)
    
    # Initialize a string to hold the extracted text
    extracted_text = ""
    
    # Iterate through each page and extract text
    for page_num in range(pdf_document.page_count):
        page = pdf_document.load_page(page_num)
        extracted_text += page.get_text()
    
    # Close the PDF file
    pdf_document.close()
    
    return extracted_text

def save_text_to_file(text, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(text)

def insert_file_path(file_path):
    try:
        # Connect to the database
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()

        # SQL query to insert the file path
        query = "INSERT INTO ExtractedText (extracted_text) VALUES (%s)"
        cursor.execute(query, (file_path,))
        
        # Commit the transaction
        conn.commit()

        # Close the cursor and connection
        cursor.close()
        conn.close()

        print("File path inserted successfully")

    except mysql.connector.Error as err:
        print(f"Error: {err}")

# Example usage
pdf_path = r"C:\Users\Janhavi\OneDrive\Desktop\Fergusson\Project Sem2\sample\Joseph Murphy - The Power of Your Subconcious Mind (1988).pdf"
text = extract_text_from_pdf(pdf_path)
print(f"Extracted Text:\n{text}")  # Print the extracted text to verify
if text:
    file_path = r"C:\Users\Janhavi\OneDrive\Desktop\Fergusson\Project Sem2\sample\file.txt"
    save_text_to_file(text, file_path)
    insert_file_path(file_path)


Extracted Text:
The POWER of
Your Subconscious Mind
KANSAS CITY, MO PUBLIC LIBRARY
The POWER of
Your Subconscious Mind
DATE DUE
near FC8-
Ftfc^
-ftPim
a
jT^fi.
PRENTIC
1
199I
*
. J.
Demco, tnc. 38-293
1963, BY
PRENTICE-HALL, INC.
ENGLEWOOD CLIFFS, N. L
ALL RIGHTS RESERVED. NO PART OF THIS BOOK
MAY BE REPRODUCED IN ANY FORM, BY MIMEO-
GRAPH OR ANY OTHER MEANS, WITHOUT PER-
MISSION
IN
WRITING FROM THE
PUBLISHER.
LIBRARY OF CONGRESS
CATALOG CARD NUMBER: 63-14731
20
19
18
17
16
15
14
13
12
This book is a reference work based on research by
the author. The opinions expressed herein are not
necessarily those of or endorsed by the Publisher.
ISBN O-lB-tflS^ES-T PBK
ISBN 0-13-bfl7T?a-l RtJD CLASSIC PBK
PRINTED IN THE TOTTED STATES OF AMERICA
68595
B&P
Mow This Book Can Work Miracles
in Your Life
I have seen miracles happen to men and women in
all
walks of life
all over the world. Miracles will happen to
you, too
when you begin using the magic power of your
subconscious mind. This book
is desig

## finding and showing the matching text from pdf

In [7]:
import cv2
import pytesseract
import mysql.connector
import fitz  # PyMuPDF

# Database configuration
db_config = {
    'user': 'root',
    'password': 'fcp@123',
    'host': 'localhost',  # Corrected hostname
    'database': 'Extracted_text'
}

def capture_image(image_path):
    # Capture an image using the webcam (you can also load an image from a file)
    # For simplicity, we'll load an image from a file
    img = cv2.imread(image_path, -1)
    
    # Check if the image loaded successfully
    if img is None:
        print(f"Error: Could not load image from {image_path}. Please check the path.")
        return None  # or raise an exception
    
    return img

def extract_text_from_image(img):
    if img is None:
        return ""  # Return an empty string if image is not loaded

    # Use pytesseract to extract text from the image
    text = pytesseract.image_to_string(img, lang='eng')
    return text

def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        # Open the PDF file
        doc = fitz.open(pdf_path)
        
        # Iterate through each page
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)  # Load page
            text += page.get_text()  # Extract text from page

        return text
    except Exception as e:
        print(f"Error: {e}")
        return ""

def find_matching_texts(extracted_text):
    try:
        # Connect to the database
        conn = mysql.connector.connect(**db_config)
        cursor = conn.cursor()

        # Split the extracted text into lines
        lines = extracted_text.split('\n')

        matching_texts = []
        
        # Iterate through each line and search for matches in the database
        for line in lines:
            if not line.strip():
                continue  # Skip empty lines
            query = "SELECT extracted_text FROM ExtractedText WHERE extracted_text LIKE %s"
            cursor.execute(query, ("%" + line + "%",))

            # Fetch all matching texts
            texts = cursor.fetchall()
            if texts:
                matching_texts.extend([text[0] for text in texts])
        
        # Close the cursor and connection
        cursor.close()
        conn.close()

        return matching_texts

    except mysql.connector.Error as err:
        print(f"Error: {err}")
        return None

# Example usage
pdf_path = r"C:\Users\Janhavi\OneDrive\Desktop\Fergusson\Project Sem2\sample\Joseph Murphy - The Power of Your Subconcious Mind (1988).pdf"  # Ensure this is a PDF file
extracted_text = extract_text_from_pdf(pdf_path)

matching_texts = find_matching_texts(extracted_text)
if matching_texts:
    print("Matching references:")
    for text in matching_texts:
        print(text)
else:
    print("No matching texts found")


Matching references:
4.
Practice Tough Love

ey

The gotden thread of a highly successful and Meaningful
life ts self-discipline. Discipline allows you to do all those
things you know In your heart you should do but never
feel like doing. Without self discipline. you will not set
clear goals, manage your time effectively. treat people
well. persist through the tough times, care for your
health or think positive thoughts.

I call the habit of self-discipline “Tough Love” because
getting tough with yourself is actually a very loving
gesture. By being stricter with yourself, you will begin to
live life more deliberately, on your own terms rather than
simply reacting to life the way a leaf floating in a stream
drifts according to the flow of the current on a particular
day. As I teach in one of my seminars, the tougher you are
on yourself, the easier life will be on you. The quality of
your life ultimately is shaped by the quality of your
choices and decisions, ones that Tange from the car