In [1]:
import pandas as pd
from PyPDF2 import PdfReader
import os

def search_in_csv(filename, roll_numbers_to_find):
    """
    Reads a CSV file using pandas and searches for roll numbers.
    Assumes the CSV has a column named 'roll_no'.
    """
    print(f"--- Searching in CSV file: {filename} ---")
    try:
        df = pd.read_csv(filename)
        if 'roll_no' not in df.columns:
            print(f"Error: The CSV file must have a column named 'roll_no'.")
            return
        existing_roll_nos = set(df['roll_no'])
        found_numbers = [r for r in roll_numbers_to_find if r in existing_roll_nos]
        not_found_numbers = [r for r in roll_numbers_to_find if r not in existing_roll_nos]
        
        if found_numbers:
            print("\n[SUCCESS] The following roll numbers were FOUND:", found_numbers)
            print("Details of found students:")
            print(df[df['roll_no'].isin(found_numbers)])
        if not_found_numbers:
            print("\n[INFO] The following roll numbers were NOT FOUND:", not_found_numbers)
    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found. Please check the name and location.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")

def search_in_pdf(filename, roll_numbers_to_find):
    """
    Reads a PDF file, extracts all text, and performs a simple text search.
    """
    print(f"--- Searching in PDF file: {filename} ---")
    try:
        reader = PdfReader(filename)
        full_text = ""
        for page in reader.pages:
            full_text += page.extract_text() or ""
        
        found_numbers = [r for r in roll_numbers_to_find if str(r) in full_text]
        not_found_numbers = [r for r in roll_numbers_to_find if str(r) not in full_text]
        
        if found_numbers:
            print("\n[SUCCESS] The following roll numbers were FOUND in the document's text:", found_numbers)
        if not_found_numbers:
            print("\n[INFO] The following roll numbers were NOT FOUND in the document's text:", not_found_numbers)
    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found. Please check the name and location.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")

# --- Main program starts here ---
if __name__ == "__main__":
    # The file path is now hardcoded. No need to ask the user.
    # The 'r' before the string is important for Windows paths.
    filename = r"D:\Roll_No.pdf"

    print(f"Attempting to process the file: {filename}")

    # Check if the file exists before asking for roll numbers
    if not os.path.exists(filename):
        print(f"\n[ERROR] File '{filename}' not found. Exiting program.")
    else:
        roll_no_input = input("Enter roll numbers to find, separated by spaces (e.g., 101 108 150): ")
        
        try:
            numbers_to_find = [int(num) for num in roll_no_input.split()]

            if filename.lower().endswith('.csv'):
                search_in_csv(filename, numbers_to_find)
            elif filename.lower().endswith('.pdf'):
                search_in_pdf(filename, numbers_to_find)
            else:
                print("\n[ERROR] Unsupported file type. Please provide a .csv or .pdf file.")
        
        except ValueError:
            print("\n[ERROR] Invalid input. Please enter only numbers separated by spaces.")

Attempting to process the file: D:\Roll_No.pdf


Enter roll numbers to find, separated by spaces (e.g., 101 108 150):  0808146


--- Searching in PDF file: D:\Roll_No.pdf ---

[SUCCESS] The following roll numbers were FOUND in the document's text: [808146]


In [2]:
import pandas as pd
from PyPDF2 import PdfReader
import os

def search_in_csv(filename, roll_numbers_to_find):
    """
    Reads a CSV and finds roll numbers along with their row number (S.No.).
    """
    print(f"--- Searching in CSV file: {filename} ---")
    try:
        df = pd.read_csv(filename)
        if 'roll_no' not in df.columns:
            print(f"Error: The CSV file must have a column named 'roll_no'.")
            return

        # A list to store detailed results for found numbers
        found_details = []
        not_found_numbers = []

        for roll_no in roll_numbers_to_find:
            # Search for the roll number in the 'roll_no' column
            result = df[df['roll_no'] == roll_no]
            
            if not result.empty:
                # Get the 0-based index of the first match
                index = result.index[0]
                # S.No. is the index + 2 (1 for 0-based index, 1 for header row)
                s_no = index + 2 
                found_details.append({'roll_no': roll_no, 's_no': s_no})
            else:
                not_found_numbers.append(roll_no)
        
        if found_details:
            print("\n[SUCCESS] The following roll numbers were FOUND:")
            for item in found_details:
                print(f"  - Roll Number: {item['roll_no']} was found at S.No. (row) {item['s_no']}")
        
        if not_found_numbers:
            print("\n[INFO] The following roll numbers were NOT FOUND:", not_found_numbers)

    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")


def search_in_pdf(filename, roll_numbers_to_find):
    """
    Reads a PDF and finds roll numbers, reporting the page and approximate line number.
    """
    print(f"--- Searching in PDF file: {filename} ---")
    try:
        reader = PdfReader(filename)
        
        found_details = []
        # Keep track of numbers that have been found to avoid duplicate reporting
        already_found = set()

        # Process page by page to get page numbers
        for page_num, page in enumerate(reader.pages, 1): # Start page count from 1
            text = page.extract_text() or ""
            lines = text.split('\n')
            
            # Process line by line to get approximate line numbers
            for line_num, line in enumerate(lines, 1): # Start line count from 1
                for roll_no in roll_numbers_to_find:
                    # Check if the roll number is in the line and not already found
                    if str(roll_no) in line and roll_no not in already_found:
                        found_details.append({'roll_no': roll_no, 'page': page_num, 'line': line_num})
                        already_found.add(roll_no)
        
        # Determine which numbers were not found at all
        not_found_numbers = [r for r in roll_numbers_to_find if r not in already_found]

        if found_details:
            print("\n[SUCCESS] The following roll numbers were FOUND:")
            for item in found_details:
                print(f"  - Roll Number: {item['roll_no']} was found on Page {item['page']}, Line {item['line']}")
        
        if not_found_numbers:
            print("\n[INFO] The following roll numbers were NOT FOUND:", not_found_numbers)

    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")

# --- Main program starts here ---
if __name__ == "__main__":
    # The file path is now hardcoded.
    filename = r"D:\Roll_No.pdf"
    print(f"Attempting to process the file: {filename}")

    if not os.path.exists(filename):
        print(f"\n[ERROR] File '{filename}' not found. Exiting program.")
    else:
        roll_no_input = input("Enter roll numbers to find, separated by spaces (e.g., 101 108 150): ")
        
        try:
            numbers_to_find = [int(num) for num in roll_no_input.split()]

            if filename.lower().endswith('.csv'):
                search_in_csv(filename, numbers_to_find)
            elif filename.lower().endswith('.pdf'):
                search_in_pdf(filename, numbers_to_find)
            else:
                print("\n[ERROR] Unsupported file type. Please provide a .csv or .pdf file.")
        
        except ValueError:
            print("\n[ERROR] Invalid input. Please enter only numbers separated by spaces.")

Attempting to process the file: D:\Roll_No.pdf


Enter roll numbers to find, separated by spaces (e.g., 101 108 150):  0818331


--- Searching in PDF file: D:\Roll_No.pdf ---

[SUCCESS] The following roll numbers were FOUND:
  - Roll Number: 818331 was found on Page 1, Line 33


In [3]:
import pandas as pd
from PyPDF2 import PdfReader
import os

def search_in_csv(filename, roll_numbers_to_find):
    """
    Reads a CSV and finds ALL occurrences of roll numbers along with their row number (S.No.).
    """
    print(f"--- Searching in CSV file: {filename} ---")
    try:
        df = pd.read_csv(filename)
        if 'roll_no' not in df.columns:
            print(f"Error: The CSV file must have a column named 'roll_no'.")
            return

        found_details = []
        not_found_numbers = []

        for roll_no in roll_numbers_to_find:
            # result is now a DataFrame containing ALL matching rows
            result = df[df['roll_no'] == roll_no]
            
            if not result.empty:
                # Loop through each found occurrence
                for index in result.index:
                    # S.No. is the index + 2 (1 for 0-based, 1 for header row)
                    s_no = index + 2 
                    found_details.append({'roll_no': roll_no, 's_no': s_no})
            else:
                not_found_numbers.append(roll_no)
        
        if found_details:
            print("\n[SUCCESS] The following roll numbers were FOUND:")
            for item in found_details:
                print(f"  - Roll Number: {item['roll_no']} was found at S.No. (row) {item['s_no']}")
        
        if not_found_numbers:
            print("\n[INFO] The following roll numbers were NOT FOUND:", not_found_numbers)

    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")


def search_in_pdf(filename, roll_numbers_to_find):
    """
    Reads a PDF and finds ALL occurrences of roll numbers, reporting the page and line number.
    """
    print(f"--- Searching in PDF file: {filename} ---")
    try:
        reader = PdfReader(filename)
        found_details = []

        # Process page by page to get page numbers
        for page_num, page in enumerate(reader.pages, 1): # Start page count from 1
            text = page.extract_text() or ""
            lines = text.split('\n')
            
            # Process line by line to get approximate line numbers
            for line_num, line in enumerate(lines, 1): # Start line count from 1
                for roll_no in roll_numbers_to_find:
                    # We removed the 'already_found' check to find all occurrences
                    if str(roll_no) in line:
                        found_details.append({'roll_no': roll_no, 'page': page_num, 'line': line_num})
        
        # After checking everything, determine which numbers were never found
        found_roll_nos_set = {item['roll_no'] for item in found_details}
        not_found_numbers = [r for r in roll_numbers_to_find if r not in found_roll_nos_set]

        if found_details:
            print("\n[SUCCESS] The following roll numbers were FOUND:")
            for item in found_details:
                print(f"  - Roll Number: {item['roll_no']} was found on Page {item['page']}, Line {item['line']}")
        
        if not_found_numbers:
            print("\n[INFO] The following roll numbers were NOT FOUND:", not_found_numbers)

    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")

# --- Main program starts here ---
if __name__ == "__main__":
    filename = r"D:\archive\Coffe_sales.csv" # Hardcoded file path
    print(f"Attempting to process the file: {filename}")

    if not os.path.exists(filename):
        print(f"\n[ERROR] File '{filename}' not found. Exiting program.")
    else:
        roll_no_input = input("Enter roll numbers to find, separated by spaces (e.g., 101 108 150): ")
        
        try:
            numbers_to_find = [int(num) for num in roll_no_input.split()]

            if filename.lower().endswith('.csv'):
                search_in_csv(filename, numbers_to_find)
            elif filename.lower().endswith('.pdf'):
                search_in_pdf(filename, numbers_to_find)
            else:
                print("\n[ERROR] Unsupported file type. Please provide a .csv or .pdf file.")
        
        except ValueError:
            print("\n[ERROR] Invalid input. Please enter only numbers separated by spaces.")

Attempting to process the file: D:\archive\Coffe_sales.csv


Enter roll numbers to find, separated by spaces (e.g., 101 108 150):  Latte



[ERROR] Invalid input. Please enter only numbers separated by spaces.


In [5]:
import pandas as pd
from PyPDF2 import PdfReader
import os

def search_and_count_in_csv(filename, search_term):
    """
    Reads a CSV and searches for a term in ALL columns. It counts all occurrences
    and lists the location (row, column) of each.
    """
    print(f"--- Searching for '{search_term}' in CSV file: {filename} ---")
    try:
        df = pd.read_csv(filename)
        # Convert the search term to a string for consistent searching
        search_term_str = str(search_term).lower()
        
        found_locations = []
        total_count = 0

        # Convert the entire dataframe to strings for searching
        df_str = df.astype(str)

        # Iterate over each cell in the dataframe
        for row_idx in range(len(df_str)):
            for col_name in df_str.columns:
                cell_value = df_str.at[row_idx, col_name].lower()
                if search_term_str in cell_value:
                    # S.No. is the row index + 2 (1 for 0-based, 1 for header)
                    s_no = row_idx + 2
                    found_locations.append({
                        's_no': s_no,
                        'column': col_name,
                        'value': df.at[row_idx, col_name] # Original value for context
                    })
                    total_count += 1
        
        print(f"\n[RESULT] Found '{search_term}' a total of {total_count} time(s).")

        if found_locations:
            print("\nLocations where the term was found:")
            for item in found_locations:
                print(f"  - S.No. (row): {item['s_no']}, Column: '{item['column']}', Value: '{item['value']}'")

    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")


def search_and_count_in_pdf(filename, search_term):
    """
    Reads a PDF, counts all occurrences of a search term, and lists their locations.
    """
    print(f"--- Searching for '{search_term}' in PDF file: {filename} ---")
    try:
        reader = PdfReader(filename)
        found_locations = []
        total_count = 0
        search_term_str = str(search_term).lower()
        
        full_document_text = ""
        # First, build the full text to get a total count
        for page in reader.pages:
            full_document_text += page.extract_text() or ""
        
        # Get the total count using the simple .count() method (case-insensitive)
        total_count = full_document_text.lower().count(search_term_str)

        # Now, find the specific locations for reporting
        for page_num, page in enumerate(reader.pages, 1):
            text = page.extract_text() or ""
            lines = text.split('\n')
            for line_num, line in enumerate(lines, 1):
                if search_term_str in line.lower():
                    found_locations.append({'page': page_num, 'line': line_num})
        
        print(f"\n[RESULT] Found '{search_term}' a total of {total_count} time(s) in the document.")

        if found_locations:
            print("\nLocations where the term was found (based on line):")
            for item in found_locations:
                print(f"  - Page: {item['page']}, Approx. Line: {item['line']}")

    except FileNotFoundError:
        print(f"\n[ERROR] The file '{filename}' was not found.")
    except Exception as e:
        print(f"\n[ERROR] An unexpected error occurred: {e}")

# --- Main program starts here ---
if __name__ == "__main__":
    filename = r"D:\EPFO\Epfo_date1.pdf" # Hardcoded file path
    print(f"Attempting to process the file: {filename}")

    if not os.path.exists(filename):
        print(f"\n[ERROR] File '{filename}' not found. Exiting program.")
    else:
        # Ask for a general search term now
        search_term_input = input("Enter the text or number you want to find: ")
        
        if not search_term_input:
            print("\n[ERROR] Search term cannot be empty.")
        else:
            if filename.lower().endswith('.csv'):
                search_and_count_in_csv(filename, search_term_input)
            elif filename.lower().endswith('.pdf'):
                search_and_count_in_pdf(filename, search_term_input)
            else:
                print("\n[ERROR] Unsupported file type. Please provide a .csv or .pdf file.")

Attempting to process the file: D:\EPFO\Epfo_date1.pdf


Enter the text or number you want to find:  Date


--- Searching for 'Date' in PDF file: D:\EPFO\Epfo_date1.pdf ---

[RESULT] Found 'Date' a total of 209 time(s) in the document.

Locations where the term was found (based on line):
  - Page: 1, Approx. Line: 17
  - Page: 1, Approx. Line: 19
  - Page: 1, Approx. Line: 20
  - Page: 1, Approx. Line: 23
  - Page: 1, Approx. Line: 25
  - Page: 1, Approx. Line: 30
  - Page: 2, Approx. Line: 17
  - Page: 3, Approx. Line: 14
  - Page: 3, Approx. Line: 16
  - Page: 3, Approx. Line: 17
  - Page: 3, Approx. Line: 19
  - Page: 3, Approx. Line: 29
  - Page: 4, Approx. Line: 2
  - Page: 4, Approx. Line: 30
  - Page: 6, Approx. Line: 6
  - Page: 6, Approx. Line: 11
  - Page: 6, Approx. Line: 14
  - Page: 6, Approx. Line: 16
  - Page: 6, Approx. Line: 17
  - Page: 6, Approx. Line: 23
  - Page: 6, Approx. Line: 25
  - Page: 6, Approx. Line: 26
  - Page: 6, Approx. Line: 30
  - Page: 7, Approx. Line: 7
  - Page: 7, Approx. Line: 10
  - Page: 7, Approx. Line: 26
  - Page: 8, Approx. Line: 8
  - Page: 9, 