# Import Libraries

In [2]:
import numpy as np
import cv2

In [3]:
import os
import time

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [5]:
import pytesseract
from PIL import Image

# Paths

- The r prefix before the string makes it a raw string, which prevents Python from interpreting backslashes (\\) as escape characters. 
- This is important because the whitelist includes \ as a special character.

In [8]:
TESSERACT_PATH    = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
CHROMEDRIVER_PATH = r"C:\Program Files\Google\chromedriver-win64\chromedriver.exe"
CHROME_PATH       = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

In [9]:
IRCTC_URL = "https://www.irctc.co.in/nget/redirect?pnr=2108873329&service=PRS_MEAL_BOOKING"

In [10]:
# Tesseract setup
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH

# Code Functionality

<h3><center> preprocess_captcha() </center></h3>

* This function is designed to enhance a CAPTCHA image to improve the accuracy of Optical Character Recognition (OCR) when using a tool like Tesseract.
* CAPTCHAs often contain noise, distortions, or low contrast, which can make text extraction difficult.
* This function applies a series of image processing techniques using OpenCV (cv2) and NumPy (np) to clean and prepare the image.

In [13]:
def preprocess_captcha(img_path):
    
    """
        Process CAPTCHA image to improve OCR accuracy.
    """

    # Converts the image to grayscale (single-channel, black-and-white) instead of keeping its RGB color channels.
    # Grayscale simplifies processing and is often sufficient for text recognition, reducing computational complexity.
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)


    
    
    # Resize to improve recognition
    # fx=2, fy=2: Scales the image width and height by a factor of 2, effectively doubling its size.
    # Uses bilinear interpolation to smooth the resized image, preserving details as much as possible.
    '''
        Purpose: Enlarging the image can make small or faint text more distinguishable, 
        potentially improving OCR accuracy by providing more pixel data for analysis.
    '''
    img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)



    
    # Apply Gaussian blur
    # (3, 3): The kernel size (a 3x3 window) used to compute the blur.
    # 0: The standard deviation of the Gaussian kernel
    '''
       Gaussian blur reduces noise and smooths the image by averaging pixel values within the kernel. 
       This helps remove small speckles or artifacts in the CAPTCHA that could confuse OCR.
    '''
    blur = cv2.GaussianBlur(img, (3, 3), 0)



    
    # Apply thresholding
    # _: The computed threshold value
    # thresh: A binary image where pixels are either 0 (black) or 255 (white), enhancing text contrast.
    '''
      cv2.THRESH_OTSU : Automatically determines the optimal threshold value based on the image histogram, 
                        which is useful for images with varying lighting or contrast.
                        
      cv2.THRESH_BINARY_INV: Inverts the binary output, making text white on a black background 
                             (common for OCR as it aligns with Tesseract’s expectations).

    
       The cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU combination is used to 
       automatically determine the best threshold for the CAPTCHA image (via Otsu’s method) and 
       invert the result to produce white text on a black background (via cv2.THRESH_BINARY_INV), optimizing it for Tesseract OCR.
    '''
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)



    
    # Morphological operations to remove noise
    # Removes remaining noise or disconnected pixels that thresholding might not handle, improving the clarity of the text.
    '''
       cv2.MORPH_OPEN: A morphological operation that first erodes (removes small white noise) and then dilates (restores the shape of the text). 
                       This helps remove small artifacts while preserving the main text structure.
    '''
    kernel = np.ones((2, 2), np.uint8)
    morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)


    
    # Save processed image for debug
    processed_path = "processed_captcha.png"
    cv2.imwrite(processed_path, morph)
    return processed_path

<h3><center> extract_text_from_captcha() </center></h3>
* Extract alphanumeric and symbol text from a processed CAPTCHA image.

In [15]:
def extract_text_from_captcha(image_path):
    
    """
       Extract alphanumeric and symbol text from a processed CAPTCHA image.
    """

    

    '''
      The whitelist restricts Tesseract to only recognize the specified characters, 
      improving accuracy by ignoring irrelevant noise or characters outside the CAPTCHA’s expected set.
    '''
    config = r'--psm 7 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;:\'",.<>?/\\'


    

    img = Image.open(image_path)



    
    ''' 
      The function returns the extracted text as a string (raw_text), which may include extra whitespace, newlines, or errors depending on the image quality and Tesseract’s accuracy.
    '''
    raw_text = pytesseract.image_to_string(img, config=config)



    
    # contain the alphanumeric and symbolic characters from the CAPTCHA image, ready to be used
    # strip() : method removes leading and trailing whitespace characters
    clean_text = raw_text.strip()



    
    return clean_text

<h3><center> solve_and_fill_captcha() </center></h3>

* Detect CAPTCHA image, extract and fill its value.
* This function takes a driver object (a Selenium WebDriver instance) as an input and attempts to solve and fill a CAPTCHA on the webpage the driver is interacting with.

`XPath :`
* Locates the CAPTCHA image element on the IRCTC webpage. 
* This XPath targets an \<img\> element within a form structure.

In [18]:
def solve_and_fill_captcha(driver):
    
    """
        Detect CAPTCHA image, extract and fill its value.
    """



    # Capture the CAPTCHA Image
    try:
        print("\n Capturing CAPTCHA image...")
        
        captcha_xpath       = '//*[@id="login_header_disable"]/div/div/div[2]/div[2]/div/div[2]/div/div[2]/form/div[5]/div/app-captcha/div/div/div[2]/span[1]/img'
        captcha_input_xpath = '//*[@id="captcha"]'
        
        # Wait for CAPTCHA image and take screenshot
        captcha_img = WebDriverWait(driver, 10).until( 
                                                       # The condition waits for an element matching the XPath captcha_input_xpath to exist in the webpage's HTML structure.
                                                       EC.presence_of_element_located((By.XPATH, captcha_xpath))
                                                     )
        captcha_img.screenshot("captcha.png")




        # Preprocess the CAPTCHA Image
        print(" Preprocessing CAPTCHA image...")
        processed_img = preprocess_captcha("captcha.png")



        
        # Solve the CAPTCHA Using OCR
        print(" Solving CAPTCHA using OCR...")
        captcha_text = extract_text_from_captcha(processed_img)
        print(f" Extracted CAPTCHA text: {captcha_text}")

        

        if len(captcha_text) >= 4:
            captcha_input = WebDriverWait(driver, 5).until(
                                                            # The condition waits for an element matching the XPath captcha_input_xpath to exist in the webpage's HTML structure.
                                                            EC.presence_of_element_located((By.XPATH, captcha_input_xpath))
                                                          )
            captcha_input.clear()
            captcha_input.send_keys(captcha_text)
            return True
        else:
            print(" CAPTCHA too short or unreadable.")
            return False


    
    except Exception as e:
        print(f" CAPTCHA solving failed: {e}")
        return False

<h3><center> main() </center></h3>

In [20]:
def main():

    '''
       Initialization of WebDriver
    '''
    # Creates a service object for ChromeDriver, which is the executable that controls Chrome
    service = Service(CHROMEDRIVER_PATH)
    
    # Sets up configuration options for the Chrome browser.
    options = webdriver.ChromeOptions()

    # Specifies the path to the Chrome executable to ensure the correct browser version is used.
    options.binary_location = CHROME_PATH

    # Opens the browser in maximized mode for better visibility and interaction.
    options.add_argument("--start-maximized")

    # Initializes the Chrome WebDriver with the specified service and options, creating a controllable browser instance.
    driver = webdriver.Chrome(service=service, options=options)






    '''
       Navigate to IRCTC Website
    '''
    try:
        print("Opening IRCTC...")
        driver.get(IRCTC_URL)
        time.sleep(2)



        # Handle Alert Popup
        print("Attempting to close the alert popup...")
        try:
            # Wait for and click the "OK" button on the alert popup
            ok_button_xpath = '//button[text()="OK"]'
            WebDriverWait(driver, 10).until(
                                             EC.element_to_be_clickable((By.XPATH, ok_button_xpath))
                                           ).click()
            print("Alert popup closed.")
            
        except Exception as e:
            print(f"No 'OK' button found or unable to close popup: {e}")
            driver.save_screenshot("popup_error_screenshot.png")
            print("Screenshot saved as popup_error_screenshot.png")

        print("Waiting for overlay to disappear...")
        overlay_xpath = '//div[contains(@class, "ui-dialog-mask") and contains(@class, "ui-widget-overlay")]'
        WebDriverWait(driver, 10).until(
                                         EC.invisibility_of_element_located((By.XPATH, overlay_xpath))
                                       )




        

        # Click the Login Button
        print("Clicking login button...")
        login_xpath = '/html/body/app-root/app-home/div[1]/app-header/div[2]/div[2]/div[1]/a[1]'
        
        WebDriverWait(driver, 10).until(
                                         EC.element_to_be_clickable((By.XPATH, login_xpath))
                                       ).click()
        time.sleep(2)

        if solve_and_fill_captcha(driver):
            print("CAPTCHA filled successfully!")
        else:
            print("CAPTCHA solving failed.")

        print("Holding browser for 30 seconds...")
        time.sleep(30)


    
    finally:
        driver.quit()

In [21]:
if __name__ == "__main__":
    main()

Opening IRCTC...
Attempting to close the alert popup...
Alert popup closed.
Waiting for overlay to disappear...
Clicking login button...

 Capturing CAPTCHA image...
 Preprocessing CAPTCHA image...
 Solving CAPTCHA using OCR...
 Extracted CAPTCHA text: ReHt9
CAPTCHA filled successfully!
Holding browser for 30 seconds...
