In [1]:
import urllib.request
import pyautogui
import time
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from selenium import webdriver
from PIL import Image

In [2]:
driver = webdriver.Chrome('./driver/chromedriver.exe')
driver.get("https://patrickhlauke.github.io/recaptcha/")
driver.fullscreen_window()

In [3]:
driver.switch_to.default_content()
driver.find_element_by_tag_name("iframe").click()
time.sleep(0.33)

In [4]:
# Find the right iframe in case of multiple redundant tags
iframes = [i for i in driver.find_elements_by_tag_name("iframe")]
correctIFrame = None
for i in iframes:
    driver.switch_to.frame(i)
    buttons = driver.find_elements_by_tag_name("button")
    if len(driver.find_elements_by_tag_name("button")) != 0:
        print("correct iframe found")
        correctIFrame = i
    driver.switch_to.default_content()

correct iframe found


In [5]:
# Find captcha box coordinates
boxX = correctIFrame.location["x"]
boxY = 45 + correctIFrame.location["y"]
driver.switch_to.frame(correctIFrame)

def find_location_of_element(element):
    x = boxX + element.size["width"] / 2 + element.location["x"]
    y = boxY + element.size["height"] / 2 + element.location["y"]
    return x, y

# Find verify button coordinates
verifyX, verifyY = find_location_of_element(driver.find_element_by_id("recaptcha-verify-button"))

# Find reload button coordinates
reloadX, reloadY = find_location_of_element(driver.find_element_by_id("recaptcha-reload-button"))

In [6]:
def clickIndex(row, column):
    tr_elements = driver.find_elements_by_tag_name("tr")
    td_elements = tr_elements[row].find_elements_by_tag_name("td")
    td_elements[column].click()

def click(x, y):
    pyautogui.moveTo(x, y, duration=0.5)
    pyautogui.click()

def submit():
    click(verifyX, verifyY)

def reload():
    click(reloadX, reloadY)

def check_table():
    table_elements = []
    rows = driver.find_elements_by_tag_name("tr")
    for i in rows:
        columns = i.find_elements_by_tag_name("td")
        for j in columns:
            table_elements.append(j)
    return table_elements   

def show_image(img):
    plt.xticks([])
    plt.yticks([])
    plt.imshow(np.array(img))
    plt.show()

def show_image_label(img, label):
    plt.xlabel(label)
    show_image(img)

In [7]:
def img_element_with_id(id):
    images = driver.find_elements_by_tag_name("img")
    for i in images:
        if i.get_attribute("class") == "rc-image-tile-33":
            return i

# Get the correct captcha size and type (3x3)
def find_3x3_captcha():
    img_element = img_element_with_id("rc-image-tile-33")
    while img_element is None:
        reload()
        time.sleep(1)
        img_element = img_element_with_id("rc-image-tile-33")
        
    return check_table()

In [8]:
tile_updated = False

def get_table_coords():
    global table_elements
    global table_coords
    table_elements = find_3x3_captcha()
    table_coords = [find_location_of_element(i) for i in table_elements]

def choose_captcha_by_index(index):
    click(table_coords[index][0], table_coords[index][1])

# ROW AND COLUMN STARTS FROM 1 INSTEAD OF 0
def choose_captcha(row, column):
    element_index = (row - 1) * 3 + (column - 1)
    choose_captcha_by_index(element_index)    

In [9]:
# Process image to the right resolution
def process_image(img):
    if img.mode == "RGBA":
        img = img.convert("RGB")
    img = img.resize((100, 100))
    return np.array(img)

# CROP IMAGE FUNCTION
def crop_image_and_convert(image):
    width, height = image.size
    grid_width = width // 3
    grid_height = height // 3
    cropped_images = np.zeros((0, 100, 100, 3), dtype=np.uint8)
    for i in range(3):
        for j in range(3):
            left = j * grid_width
            upper = i * grid_height
            right = (j + 1) * grid_width
            lower = (i + 1) * grid_height
            cropped = image.crop((left, upper, right, lower))
            cropped_images = np.concatenate((cropped_images, [process_image(cropped)]))
    return cropped_images

In [10]:
# Load model and categories
loaded_model = tf.keras.models.load_model("data/model_test.h5")

label_names = [
    'Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney',
    'Crosswalk', 'Hydrant', 'Motorcycle', 'Other', 'Palm', 'Stair',
    'Traffic Light'
]

captcha_label = [
    ["bicycles", "bicycle", "bike"],
    ["bridges", "bridge"],
    ["buses", "bus"],
    ["cars", "car"],
    ["chimney"],
    ["crosswalks", "crosswalk"],
    ["a fire hydrant", "fire hydrants"],
    ["motorcycles"],
    ["UNDEFINED?"],
    ["tree", "palm tree"],
    ["stairs"],
    ["traffic lights"]
]

def decode(label):
    return np.argmax(label)

def decode_to_string(label):
    return label_names[decode(label)]

In [11]:
# Download image and convert to numpy
def predict_current():
    urllib.request.urlretrieve(driver.find_element_by_class_name("rc-image-tile-33").get_attribute("src"), "data/captcha.jpeg")
    global to_predict 
    to_predict = crop_image_and_convert(Image.open("data/captcha.jpeg"))


In [23]:
# Prepare and check label to predict
def get_label_index():
    label = driver.find_element_by_tag_name("strong").text
    for i in captcha_label:
        if label in i:
            index = captcha_label.index(i)
            print("Find the", label_names[index])
            return index
        
def check_warnings():
    class_names = [
        "rc-imageselect-incorrect-response",
        "rc-imageselect-error-select-more",
        "rc-imageselect-error-dynamic-more",
        "rc-imageselect-error-select-something"
    ]
    return [driver.find_element_by_class_name(i).is_displayed() for i in class_names]

def check_tile_update(index):
    global to_predict
    get_table_coords()
    img_element = table_elements[index].find_element_by_tag_name("img")
    if img_element.get_attribute("class") == "rc-image-tile-11":
        file_path = "data/captcha-tile-%d.jpeg" % (index)
        urllib.request.urlretrieve(img_element.get_attribute("src"), file_path)
        to_predict[index] = np.array(Image.open(file_path))
        return True
    return False

In [13]:
def print_grid():
    for i in range(9):
        plt.subplot(3, 3, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(to_predict[i])
    plt.show()

In [16]:
# Code to print predicted images:
# for i in range(len(to_predict)):
#     show_image_label(to_predict[i], decode(predicted[i]))

In [17]:
# CAPTCHA INTERACTION DEMO
# WARNING!! THIS CODE WILL TAKE CONTROL OF YOUR MOUSE
# USE ALT F4 ON THE CHROME DRIVER TO FORCE INTERRUPT THE KERNEL
# for k in range(100):
#     table_coords = get_table_coords()
#     for i in range(3):
#         for j in range(3):
#             choose_captcha(i + 1, j + 1)
#     submit()
#     print("submit successful, sleeping for 3 seconds.")
#     time.sleep(3)