In [None]:
import requests
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pytesseract
import re
import json

def process_pixel(img):
    # img = cv2.imread(img)
    border_margin = 12
    height, width = img.shape

    # Set the border area to white (255)
    img[:border_margin, :] = 255
    img[-border_margin:, :] = 255
    img[:, :border_margin] = 255
    img[:, -border_margin:] = 255

    img = cv2.fastNlMeansDenoising(img, None, 30, 7, 21)

    img = cv2.resize(img, (int(img.shape[1] * 10), int(img.shape[0] * 10)), interpolation=cv2.INTER_CUBIC)

    cv2.imwrite("resized_crop.png", img)
    show_img(img, "re.png")

    custom_config = '-l eng --oem 3 --psm 6'
    extracted_text = pytesseract.image_to_string(img, config=custom_config)

    if extracted_text:
        return True 
    return False 

# Currently works for image E 
def show_img(img, title):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.title(title)
    plt.imshow(img, cmap='gray')
    plt.axis('off') 

#Set the URL of the server
url = "http://127.0.0.1:5000/"  # Make sure this matches your Flask app's URL

#Path to the image file on your local disk
image_path = "crosswordE.jpg"


#Load the original image using OpenCV (to display it)
orig = cv2.imread(image_path)

#Display the original image
# plt.figure(figsize=(10, 5))
# plt.subplot(1, 2, 1)
# plt.title("Original Image")
# plt.imshow(orig)
# plt.axis('off')  # Hide axes

def extract_boxes(image):
    # Load the image
    height, width = image.shape[:2]

    # Define the regions based on the layout
    # Box 1: Left half of the page
    across1 = image[(height*12//100):(height//2), (width//18):(width*8//25)]
    across2 = image[(height*12//100):(height//2), (width*9//30):(width*3//5)]
    # Box 2: Right half of the page
    down = image[(height*12//100):(height-(height*12//100)), ((width*40)//65):(width-(width//19))]

    # Box 3: Right side, top third width
    cross = image[(height//2)+(height//30):(height-(height//25)), ((width//22)):(width*3//5)-7]

    return across1, across2, down, cross


def process(image, name):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.GaussianBlur(image, (5, 5), 0)
    sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    image = cv2.filter2D(image, -1, sharpening_kernel)

    mask = cv2.inRange(image, 0, 180)
    image[mask == 255] = 0     

    near_white_mask = cv2.inRange(image, 190, 255)
    image[near_white_mask == 255] = 255
    cv2.imwrite(name, image) 
    return image

def text_extract(image, name):
    #image = process(image, name)
    custom_config = r'--oem 1 --psm 6'
    # Use pytesseract to extract text from the image
    extracted_text = pytesseract.image_to_string(image, config=custom_config)
    #print(extracted_text)
    extracted_text = extracted_text.replace("$", "5")
    extracted_text = extracted_text.replace("|", "1") 
    # Split the text into individual lines
    lines = extracted_text.split("\n")

    clues = {}
    # Initialize an array with 71 slots (0 to 70) since the max index is 70
    clue_array = [""] * 71

    current_index = None  # Track the current index for multi-line clues

    # Process each line
    for line in lines:
        if ")" in line:
            # Try to split at the first closing parenthesis
            parts = line.split(")", 1)
            try:
                # Check if the first part is an integer (indicating a new clue)
                index = int(parts[0].strip())
                clue_text = parts[1].strip()
                
                # Start a new clue at the specified index
                clues[index] = clue_text
                current_index = index 
                # clue_array[index] = clue_text
                # current_index = index  # Update the current index for possible multi-line clue

            except ValueError:
                # If conversion fails, treat it as part of the previous clue
                if current_index is not None:
                    # Append this line to the current clue
                    clues[current_index] += " " + line.strip()
                    # clue_array[current_index] += " " + line.strip()
        else:
            # Treat lines without an index as continuation of the previous clue
            if current_index is not None:
                clues[current_index] += " " + line.strip()
                # clue_array[current_index] += " " + line.strip()

    return clues


def remove_grid_lines(image):
    """Remove grid lines using morphological operations."""
    kernel = np.ones((15, 15), np.uint8)  # Larger kernel to cover the grid lines
    closed_image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    return closed_image

def find_contours(edges):
    """Find contours and select the one with 4 corners that represents the document."""
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Filter out smaller contours based on area
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    valid_contours = []
    
    for contour in contours:
        # Get contour area
        area = cv2.contourArea(contour)
        
        # Filter out small contours (likely to be the grid squares)
        if area > 5000:  # Threshold the area to remove small contours (adjust as needed)
            epsilon = 0.05 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)
            if len(approx) == 4:
                valid_contours.append(approx)
    
    # If valid contours exist, return the largest one (which should be the document)
    if valid_contours:
        valid_contours = sorted(valid_contours, key=cv2.contourArea, reverse=True)
        return valid_contours[0].reshape(4, 2)
    
    return None  # No valid contours found

def preprocess(): 
    """
    Applies steps to make an image ready for crossword solving:
    
    1. Converts the original image to grayscale.
    2. Applies Gaussian blur to reduce noise.
    3. Uses bilateral filtering to further smooth the image while preserving edges.
    4. Enhances contrast with binary thresholding.
    5. Performs edge detection using the Canny algorithm.
    6. Finds contours in an image to find the 4 corners representing the document. 
    
    Each intermediate image is displayed with a descriptive title.
    """
    # Grayscale 
    grayscale = cv2.cvtColor(orig, cv2.COLOR_BGR2GRAY)
    #cv2.imwrite("grayscale_image.jpg", grayscale)
    #show_img(grayscale, "grayscale")

    # Gaussian Blur 
    gauss = cv2.GaussianBlur(grayscale, (5,5), 0)
    #cv2.imwrite("blur.jpg", gauss)
    #show_img(gauss, "gauss")

    # Bilateral Filtering
    bilat = cv2.bilateralFilter(gauss,9,75,75)
    #cv2.imwrite("bilat.jpg", bilat)
    #show_img(bilat, "bilat")
    
    # Contrast 
    _, thresh = cv2.threshold(bilat, 128, 255, cv2.THRESH_BINARY)
    #cv2.imwrite("thresh.jpg", thresh)
    #show_img(thresh, "thresh")

    clean = remove_grid_lines(thresh)
    #show_img(clean,"c")
    # Edge Detection 
    edges = cv2.Canny(clean, 100, 150)
    #cv2.imwrite("canny.jpg", canny)
    #show_img(edges, "edges")

    # Contours 
    corners = find_contours(edges)

    if corners is None:
        print("No contour with 4 corners found.")
        return jsonify({"error": "Document edges not detected"}), 400

    contours = orig.copy()
    cv2.drawContours(contours, [corners], -1, (0, 255, 0), 3)
    #show_img(contours, "Detected Document Contour Edges")

    # top left, top right, bottom left, bottom right
    dst_points = np.float32([[2480, 0], [0, 0], [0, 3508], [2480, 3508]])
    matrix = cv2.getPerspectiveTransform(np.float32(corners), dst_points)
    transformed = cv2.warpPerspective(orig, matrix, (2480, 3500))
    #show_img(transformed, "Transformed (Flattened) Document")
    return transformed


def crossword_extract(image):
    # Process the image (ensure you define the 'process' function for preprocessing)

    image = process(image, "cross.png")  
    cv2.imwrite("cross.png", image) 
    

    h = image.shape[0] // 15  # height of each cropped section
    w = image.shape[1] // 15  # width of each cropped section

    grid = []
    count = 1
    # Loop through each row and column for cropping
    for i in range(15):
        row = []
        for j in range(15):
            # Define the cropping coordinates
            y_start = i * h # initial start is 0 times the height
            y_end = (i + 1) * h # initital end is 1 times the height 
            x_start = j * w 
            x_end = (j + 1) * w

            cropped_image = image[y_start:y_end, x_start:x_end]
            cv2.imwrite("crop.png", cropped_image)

            text = process_pixel(cropped_image)

            # Check if the cropped section is mostly white or black
            avg_pixel_value = np.mean(cropped_image)

            #show_img(cropped_image, "croppp.png")
            
            # If the average pixel value is greater than 127, classify it as white
            if avg_pixel_value > 127:
                if text:  # If text is found
                    row.append(count)
                    count += 1
                    #row.append(text.strip())  # Append the found text
                else:
                    row.append("white")  # Append "white" if no text found
            else:
                row.append("black")

        # Add the row to the grid
        grid.append(row)

    return grid

img = preprocess()
across1, across2, down, cross = extract_boxes(img)

# across2_array = text_extract(across2, "across2.png")
# across1_array = text_extract(across1, "across1.png")
# down_array = text_extract(down, "down.png")

new_cross = crossword_extract(cross)
for row in new_cross:
    print(row)

#pretty_json = json.dumps(across2_array, indent=4)




# print(pretty_json)


# for i in range(len(arr1)):
#     if not arr1[i] and arr2[i]:
#         arr1[i] = arr2[i]

# for i in range(len(arr1)):
#     if arr1[i]:
#         print(i, arr1[i])


#Open the image file in binary mode and send the POST request
with open(image_path, "rb") as image_file:
    files = {"image": image_file}
    res = requests.post(url, files=files)

#Check the response
print(res.status_code)
print(res.text)  # Assuming the response is in JSON format