In [1]:
import urllib.request
import pyautogui
import time
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from selenium import webdriver
from PIL import Image

In [2]:
driver = webdriver.Chrome('./driver/chromedriver.exe')
# driver = webdriver.Edge("./driver/msedgedriver.exe")
driver.get("https://patrickhlauke.github.io/recaptcha/")
driver.fullscreen_window()

In [3]:
driver.switch_to.default_content()
driver.find_element_by_tag_name("iframe").click()
time.sleep(0.33)

In [4]:
# Find the right iframe in case of multiple redundant tags
iframes = [i for i in driver.find_elements_by_css_selector('[title="recaptcha challenge expires in two minutes"]')]
correctIFrame = None
iFrameIndex = None
for i in iframes:
    driver.switch_to.frame(i)
    buttons = driver.find_elements_by_tag_name("button")
    if len(driver.find_elements_by_tag_name("button")) != 0:
        print("correct iframe found")
        correctIFrame = i
        iFrameIndex = iframes.index(i)
    driver.switch_to.default_content()

correct iframe found


In [5]:
# Find captcha box coordinates
boxX = correctIFrame.location["x"]
boxY = 45 + correctIFrame.location["y"]
driver.switch_to.frame(correctIFrame)

def find_location_of_element(element):
    x = boxX + element.size["width"] / 2 + element.location["x"]
    y = boxY + element.size["height"] / 2 + element.location["y"]
    return x, y

# Find verify button coordinates
verifyX, verifyY = find_location_of_element(driver.find_element_by_id("recaptcha-verify-button"))

# Find reload button coordinates
reloadX, reloadY = find_location_of_element(driver.find_element_by_id("recaptcha-reload-button"))

# Increase Y to compensate for warning message
reloadY += 10
verifyY += 10

In [6]:
# DRIVER INTERACTION FUNCTIONS
def clickIndex(row, column):
    tr_elements = driver.find_elements_by_tag_name("tr")
    td_elements = tr_elements[row].find_elements_by_tag_name("td")
    td_elements[column].click()

def click(x, y):
    pyautogui.moveTo(x, y, duration=0.5)
    pyautogui.click()

def click_verify():
    click(verifyX, verifyY)

def reload():
    click(reloadX, reloadY)

def check_table():
    table_elements = []
    rows = driver.find_elements_by_tag_name("tr")
    for i in rows:
        columns = i.find_elements_by_tag_name("td")
        for j in columns:
            table_elements.append(j)
    return table_elements   

def img_element_with_id(id):
    images = driver.find_elements_by_tag_name("img")
    for i in images:
        if i.get_attribute("class") == "rc-image-tile-33":
            return i

# Get the correct captcha size and type (3x3)
def find_3x3_captcha():
    img_element = img_element_with_id("rc-image-tile-33")
    while img_element is None:
        reload()
        time.sleep(1)
        img_element = img_element_with_id("rc-image-tile-33")
    return check_table()

In [7]:
# IMAGE PROCESSING FUNCTIONS
def show_image(img):
    plt.xticks([])
    plt.yticks([])
    plt.imshow(np.array(img))
    plt.show()

def show_image_label(img, label):
    plt.xlabel(label)
    show_image(img)

def process_image(img):
    if img.mode == "RGBA":
        img = img.convert("RGB")
    img = img.resize((100, 100))
    return np.array(img)

def crop_image_and_convert(image):
    width, height = image.size
    grid_width = width // 3
    grid_height = height // 3
    cropped_images = np.zeros((0, 100, 100, 3), dtype=np.uint8)
    for i in range(3):
        for j in range(3):
            left = j * grid_width
            upper = i * grid_height
            right = (j + 1) * grid_width
            lower = (i + 1) * grid_height
            cropped = image.crop((left, upper, right, lower))
            cropped_images = np.concatenate((cropped_images, [process_image(cropped)]))
    return cropped_images

In [8]:
tile_updated = False

def get_table_coords():
    global table_elements
    global table_coords
    table_elements = find_3x3_captcha()
    table_coords = [find_location_of_element(i) for i in table_elements]

def choose_captcha_by_index(index):
    click(table_coords[index][0], table_coords[index][1])

# ROW AND COLUMN STARTS FROM 1 INSTEAD OF 0
def choose_captcha(row, column):
    element_index = (row - 1) * 3 + (column - 1)
    choose_captcha_by_index(element_index)    

In [9]:
# Load model and categories
loaded_model = tf.keras.models.load_model("data/solver/model_test2.h5")

label_names = [
    'Bicycle', 'Bridge', 'Bus', 'Car', 'Chimney',
    'Crosswalk', 'Hydrant', 'Motorcycle', 'Other', 'Palm', 'Stair',
    'Traffic Light'
]

captcha_label = [
    ["bicycles", "bicycle", "bike"],
    ["bridges", "bridge"],
    ["buses", "bus"],
    ["cars", "car"],
    ["chimney"],
    ["crosswalks", "crosswalk"],
    ["a fire hydrant", "fire hydrants"],
    ["motorcycles"],
    ["UNDEFINED?"],
    ["tree", "palm tree"],
    ["stairs"],
    ["traffic lights"]
]

def decode(label):
    prediction = []
    for i in label:
        if i > 0.46:
            prediction.append(list(label).index(i))
    return prediction

def decode_to_string(label):
    predictions = decode(label)
    string_prediction = []
    for i in predictions:
        to_append = "%s(%lf)" % (label_names[i], label[i])
        string_prediction.append(to_append)
    return string_prediction

In [10]:
# Download image and convert to numpy
def get_original_image():
    get_table_coords()
    urllib.request.urlretrieve(driver.find_element_by_class_name("rc-image-tile-33").get_attribute("src"), "data/solver/captcha.jpeg")
    return crop_image_and_convert(Image.open("data/solver/captcha.jpeg"))

def check_tile_update(index, images):
    get_table_coords()
    img_element = table_elements[index].find_element_by_tag_name("img")
    if img_element.get_attribute("class") == "rc-image-tile-11":
        file_path = "data/solver/captcha-tile-%d.jpeg" % (index)
        urllib.request.urlretrieve(img_element.get_attribute("src"), file_path)
        images[index] = np.array(Image.open(file_path))
        return True
    return False

def latest_image():
    images = get_original_image()
    debug = 0 
    for i in range(9):
        if check_tile_update(i, images):
            debug += 1
    print("%d images updated" % debug)
    return images

In [11]:
# Prepare and check label to predict
def get_label_index():
    label = driver.find_element_by_tag_name("strong").text
    for i in captcha_label:
        if label in i:
            index = captcha_label.index(i)
            print("Find the", label_names[index])
            return index
        
def check_warnings():
    class_names = [
        "rc-imageselect-incorrect-response",
        "rc-imageselect-error-select-more",
        "rc-imageselect-error-dynamic-more",
        "rc-imageselect-error-select-something"
    ]
    return [driver.find_element_by_class_name(i).is_displayed() for i in class_names]

In [12]:
def print_grid(images, predictions):
    for i in range(9):
        plt.subplot(3, 3, i+1)
        plt.xlabel(" ".join(predictions[i]))
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i])
    plt.show()

In [13]:
def switch_to_iframe1():
    driver.switch_to.default_content()
    iframes = driver.find_elements_by_css_selector('[title="reCAPTCHA"]')
    driver.switch_to.frame(iframes[iFrameIndex])

def switch_to_iframe2():
    driver.switch_to.default_content()
    iframes = driver.find_elements_by_css_selector('[title="recaptcha challenge expires in two minutes"]')
    driver.switch_to.frame(iframes[iFrameIndex])

def check_success():
    switch_to_iframe1()
    checker = driver.find_elements_by_css_selector('[aria-checked="true"]')
    if len(checker) == 0:
        switch_to_iframe2()
        return False
    else:
        driver.switch_to.default_content()
        return True

In [14]:
def predict_and_click(images, pred_label):
    index_clicked = []
    prediction = loaded_model.predict(images)
    curr_index = 0
    for i in prediction:
        if pred_label in decode(i):
            choose_captcha_by_index(curr_index)
            index_clicked.append(curr_index)
        curr_index += 1
    time.sleep(5)
    return index_clicked, [decode_to_string(i) for i in prediction]

def get_captcha_label():
    label_text = driver.find_element_by_tag_name("strong").text
    for i in captcha_label:
        if label_text in i:
            return captcha_label.index(i)
        
def submit():
    
    click_verify()
    time.sleep(4)
    get_table_coords()
    if check_success() == False:
        time.sleep(1.5)
        solve_captcha(latest_image())

def solve_captcha(images, index_clicked=[]):
    get_table_coords()
    prediction_label = get_captcha_label()
    if len(index_clicked) != 0:
        for i in index_clicked:
            check_tile_update(i, images)
    index_clicked, predictions = predict_and_click(images, prediction_label)
    print_grid(images, predictions)
    print("INDEX CLICKED:", index_clicked)
    if len(index_clicked) > 0:
        solve_captcha(images, index_clicked=index_clicked)
    else:
        get_table_coords()
        submit()

In [15]:
solve_captcha(get_original_image())

In [None]:
# Code to print predicted images:
# for i in range(len(to_predict)):
#     show_image_label(to_predict[i], decode(predicted[i]))