In [1]:
# Project 1 v3
# Bringing it all together

In [2]:
# imports
# Required libraries to be installed: Pillow, OpenCV
import cv2 as cv
import numpy as np
import glob
import os
import pickle
import matplotlib.pyplot as plt
from IPython.display import clear_output, Image, display
import PIL.Image
import io

In [3]:
# helper functions
def show_image(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = io.BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))
    
def read_image(index, source, resize):
    image = cv.imread(source[index])
    return cv.resize(image, (0, 0), fx=resize, fy=resize)

In [4]:
# I tried with multiple images in the beginning
template_images = glob.glob(os.path.join('./', "template_*.jpg")) 
char_to_index = {'A': 0, 'B': 1, 'C': 2, 'D': 3}


In [5]:
# helper variables
template_image_full = read_image(0, template_images, 1)
template_image = cv.resize(template_image_full, (0, 0), fx=0.6, fy=0.6)
h, w, _ = template_image.shape
template_image = template_image[int(0.36 * h):h, :]
# show_image(template_image)

In [6]:
def get_first_grid(image):
    image_h, image_w, channels = image.shape
    return image[int(0.32 * image_h):int(image_h * 0.78), int(image_w * 0.222): int(image_w * 0.367)]

def get_second_grid(image):
    image_h, image_w, channels = image.shape
    return image[int(0.32 * image_h):int(image_h * 0.79), int(image_w * 0.75): int(image_w * 0.90)]

In [7]:
# I initially played with it, taking the first best features etc
# still, the best result is with the lab code, just with nfeatures = 10k instead of 5k
def warp_image(template, query):
    img1 = template
    img2 = query
    # create ORB object
    orb = cv.ORB_create(nfeatures=10000)
    # get the keypoints and the corresponding descriptors
    kp1, des1 = orb.detectAndCompute(img1, None)
    kp2, des2 = orb.detectAndCompute(img2, None) 
    # create BFMatcher object
    # matcher takes normType, which is set to cv2.NORM_L2 for SIFT and SURF, cv2.NORM_HAMMING for ORB, FAST and BRIEF
    bf = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)

    # Match descriptors.
    matches = bf.match(des2, des1) # query_image, train_image
    # Sort them in the order of their distance.
    matches = sorted(matches, key = lambda x:x.distance)

    # points template from img1, the template image
    points_template = np.zeros((len(matches), 2), dtype=np.float32)
    # points_query from img2, the query image
    points_query = np.zeros((len(matches), 2), dtype=np.float32)

    for i,m in enumerate(matches):
        points_template[i,:] = kp1[m.trainIdx].pt
        points_query[i,:] = kp2[m.queryIdx].pt

    H,mask = cv.findHomography(points_query, points_template, cv.RANSAC)

    # use homography to get the aligned image 
    height, width, _ = template.shape # the shape with respect to the template image
    aligned_image2 = cv.warpPerspective(query, H, (width, height), flags=cv.INTER_NEAREST)

    return aligned_image2

In [8]:
# Given any of the 2 tables, it finds it
# The positions are hard coded. I initially detected the lines and columns, but before I could perfect
# it this yielded good results so I left it like this. It's a bit dirty but it works
def find_x_from_table(table):
    table_gray = cv.cvtColor(table, cv.COLOR_BGR2GRAY)
    image = np.dstack((table_gray, table_gray, table_gray))
    x_color = (0, 255, 0)  # plot a patch containing an X with green color
    blank_color = (0, 0, 255)  # plot a patch containing a blank with red color 
    x_positions = [0] * 15
    w, h, _ = image.shape
    cell_height = int(h / 5)
    cell_width = int(w / 4)
    
    y_mins = [5, 24, 40, 57, 75, 90, 108, 125, 142, 159, 176, 192, 210, 227, 245]
    
    for i in range(0, 15):
        colors = [blank_color] * 4
            
        y_min = y_mins[i]
        y_max = y_min + 10
            
        x1_min = 4
        x1_max = 20
        
        x2_min = 27
        x2_max = 42
        
        x3_min = 48
        x3_max = 64
        
        x4_min = 71
        x4_max = 84
        
        patch1 = image[y_min:y_max,x1_min:x1_max].copy().mean()
        patch2 = image[y_min:y_max,x2_min:x2_max].copy().mean()
        patch3 = image[y_min:y_max,x3_min:x3_max].copy().mean()
        patch4 = image[y_min:y_max,x4_min:x4_max].copy().mean()
        patches = [patch1, patch2, patch3, patch4]
        
        # Get the indices of maximum element in numpy array
        min_value = min(patches)
        index = patches.index(min_value)
        colors[index] = x_color
        x_positions[i] = index
        cv.rectangle(image, (x1_min, y_min), (x1_max, y_max), color=colors[0], thickness=1)
        cv.rectangle(image, (x2_min, y_min), (x2_max, y_max), color=colors[1], thickness=1)
        cv.rectangle(image, (x3_min, y_min), (x3_max, y_max), color=colors[2], thickness=1)
        cv.rectangle(image, (x4_min, y_min), (x4_max, y_max), color=colors[3], thickness=1)
# I return both the image and the position so that I can show the image if the positions do not match
    return (x_positions, image)

In [9]:
# reads and returns an dictionary of arrays with the ground truth answers
def get_ground_truth_answers(path):
    file_names = glob.glob(os.path.join(path, '*.txt')) 
    phisycs = []
    informatics = np.empty((4,), dtype=object)
    physics = np.empty((4,), dtype=object)
    ground_truth = {}
    for file in file_names:
        data = np.loadtxt(file, dtype=str)
        option = data[0][0]
        variant = int(data[0][1])
        if (option == 'I'):
            informatics[variant - 1] = data[1:-1]
        if (option == 'F'):
            physics[variant - 1] = data[1:-1]
    ground_truth['F'] = physics
    ground_truth['I'] = informatics
    return ground_truth

In [10]:
# Concatenates both table readings and returns them as an array
def get_answers_from_image(image):
    table1 = get_first_grid(image)
    table2 = get_second_grid(image)
    
    table_x1 = find_x_from_table(table1)
    table_x2 = find_x_from_table(table2)
#     show_image(table_x1[1])
#     show_image(table_x2[1])
    results = table_x1[0] + table_x2[0]   
    return (results, table_x1[1], table_x2[1])


In [11]:
# answers = computed answers from the image
# options = I / F
# variant = 1, 2, 3 or 4
# ground_truth = returned from get_ground_truth_answers(path), a dictionary of arrays
def calculate_grade(answers, ground_truth, option, variant):
    grade = 0
    for i in range (0, 30):
        # variant - 1 so that we map 1..4 to 0..3
        ground_truth_answer = ground_truth[option][variant - 1][i][1] 
        if (answers[i] == char_to_index[ground_truth_answer]):
            grade += 0.3
    return round(grade + 1, 2)

In [16]:
# Scenario 1
# In each scenario I read the variables from scratch, to be sure

# ground truth, the 8 possible grading scenarios
ground_truth_answers_path = '../Files/ground-truth-correct-answers'
ground_truth = get_ground_truth_answers(ground_truth_answers_path)

# images folder
images = glob.glob('./test_data/1.scanned/*.jpg') 
# sort them by number
try: 
    images.sort(key = lambda x: int(x.split('/')[-1].split('_')[0]))
    total = len(images)
except:
    print("An exception occurred")
    
print("Starting scenario 1 on", total, "files")
# answers file
answers_file = open('dumitriu_andrei_task1.txt', 'w+')

for i in range(0, total):
    try:
        image_name = images[i].split('/')[-1].split('.')[0]
        option = image_name[-2]
        variant = int(image_name[-1])
        image_number = int(images[i].split('/')[-1].split('.')[0].split('_')[0])

        image = read_image(i, images, 1)
        # warping the image two times, to help improve accuracy
        warped1 = warp_image(template_image, image)
        warped = warp_image(template_image, warped1)

        # I had a different structure in the code before and resized twice
        # I tried replacing it with one resize of 0.25 but it yields worse results
        resized = cv.resize(warped, (0,0), fx=0.5, fy=0.5)
        image = cv.resize(resized, (0,0), fx=0.5, fy=0.5)

        answers = get_answers_from_image(image)[0]
        grade = calculate_grade(answers, ground_truth, option, variant)

        written_string = str(image_name + ".jpg" + "    " + str(grade))
        answers_file.write(written_string)
        answers_file.write('\n')
        print(written_string)
    except:
        print("An exception occurred") # careful, this also blocks kernel intrerrupt

answers_file.close()
print("Done.")

Starting scenario 1 on 55 files
01_scanned_F2.jpg    7.9
02_scanned_F4.jpg    7.9
03_scanned_F1.jpg    4.0
04_scanned_F1.jpg    6.7
05_scanned_F3.jpg    7.3
06_scanned_F4.jpg    9.1
07_scanned_F3.jpg    8.2
08_scanned_F4.jpg    5.5
09_scanned_F1.jpg    7.3
10_scanned_F4.jpg    7.6
11_scanned_I2.jpg    3.4
12_scanned_F1.jpg    5.2
13_scanned_I4.jpg    5.8
14_scanned_F4.jpg    4.0
15_scanned_F3.jpg    6.1
16_scanned_F3.jpg    7.0
17_scanned_I4.jpg    5.8
18_scanned_F1.jpg    7.0
19_scanned_F2.jpg    7.9
20_scanned_F4.jpg    5.2
21_scanned_F4.jpg    8.5
22_scanned_F4.jpg    6.7
23_scanned_I4.jpg    6.7
24_scanned_F1.jpg    8.5
25_scanned_F3.jpg    7.3
26_scanned_I4.jpg    6.1
27_scanned_F1.jpg    3.1
28_scanned_F1.jpg    7.0
29_scanned_F4.jpg    8.8
30_scanned_F4.jpg    6.7
31_scanned_I1.jpg    5.8
32_scanned_F2.jpg    8.2
33_scanned_F1.jpg    6.1
34_scanned_I1.jpg    6.7
35_scanned_F3.jpg    9.1
36_scanned_I4.jpg    6.7
37_scanned_F3.jpg    6.7
38_scanned_F1.jpg    8.8
39_scanned_F4.jpg 

In [17]:
# Scenario 2
# In each scenario I read the variables from scratch, to be sure

# ground truth, the 8 possible grading scenarios
ground_truth_answers_path = '../Files/ground-truth-correct-answers'
ground_truth = get_ground_truth_answers(ground_truth_answers_path)

# images folder
images = glob.glob("./test_data/2.rotated+perspective/*.jpg") 

# sort them by number
try: 
    images.sort(key = lambda x: int(x.split('/')[-1].split('_')[0]))
    total = len(images)
except:
    print("An exception occurred")
# answers file
answers_file = open('dumitriu_andrei_task2.txt', 'w+')

print("Starting scenario 2 on", total, "files")
for i in range(0, total):
    try:
        image_name = images[i].split('/')[-1].split('.')[0]
        option = image_name[-2]
        variant = int(image_name[-1])
        image_number = int(images[i].split('/')[-1].split('.')[0].split('_')[0])

        image = read_image(i, images, 1)
        # warping the image two times, to help improve accuracy
        warped1 = warp_image(template_image, image)
        warped = warp_image(template_image, warped1)

        # I had a different structure in the code before and resized twice
        # I tried replacing it with one resize of 0.25 but it yields worse results
        resized = cv.resize(warped, (0,0), fx=0.5, fy=0.5)
        image = cv.resize(resized, (0,0), fx=0.5, fy=0.5)

        answers = get_answers_from_image(image)[0]

        table1 = get_answers_from_image(image)[1]
        table2 = get_answers_from_image(image)[2]

        grade = calculate_grade(answers, ground_truth, option, variant)

        written_string = str(image_name + ".jpg" + "    " + str(grade))
        answers_file.write(written_string)
        answers_file.write('\n')
        print(written_string)
    except:
        print("An exception occurred") # careful, this also blocks kernel intrerrupt
    
answers_file.close()
print("Done.")

Starting scenario 2 on 100 files
001_rotated_F2.jpg    7.9
002_rotated_F4.jpg    7.9
003_rotated_F1.jpg    4.0
004_rotated_F1.jpg    6.7
005_rotated_F3.jpg    7.3
006_rotated_F4.jpg    9.1
007_rotated_F3.jpg    8.2
008_rotated_F4.jpg    5.5
009_rotated_F1.jpg    7.3
010_rotated_F4.jpg    7.6
011_rotated_I2.jpg    3.4
012_rotated_F1.jpg    5.2
013_rotated_I4.jpg    5.8
014_rotated_F4.jpg    4.0
015_rotated_F3.jpg    6.1
016_rotated_F3.jpg    7.0
017_rotated_I4.jpg    5.8
018_rotated_F1.jpg    7.0
019_rotated_F2.jpg    7.9
020_rotated_F4.jpg    5.2
021_rotated_F4.jpg    8.5
022_rotated_F4.jpg    6.7
023_rotated_I4.jpg    6.7
024_rotated_F1.jpg    8.5
025_rotated_F3.jpg    7.3
026_rotated_I4.jpg    6.1
027_rotated_F1.jpg    3.1
028_rotated_F1.jpg    7.0
029_rotated_F4.jpg    8.8
030_rotated_F4.jpg    6.7
031_rotated_I1.jpg    5.8
032_rotated_F2.jpg    8.2
033_rotated_F1.jpg    6.1
034_rotated_I1.jpg    6.7
035_rotated_F3.jpg    9.1
036_rotated_I4.jpg    6.7
037_rotated_F3.jpg    6.7
038_r

In [18]:
# Scenario 3
# In each scenario I read the variables from scratch, to be sure

# ground truth, the 8 possible grading scenarios
ground_truth_answers_path = '../Files/ground-truth-correct-answers'
ground_truth = get_ground_truth_answers(ground_truth_answers_path)

# images folder
images = glob.glob("./test_data/3.no_annotation/*.jpg") 

# sort them by number
try:
    images.sort(key = lambda x: int(x.split('/')[-1].split('_')[0].split('.')[0]))
    total = len(images)
except:
    print("An exception occurred")
    
# answers file
answers_file = open('dumitriu_andrei_task3.txt', 'w+')

count = 0 # the number of correctly read answers
print("Starting scenario 3 on", total, "files")
for i in range(0, total):
    try:
        # for real images
        image_name = images[i].split('/')[-1].split('.')[0]   
        image = read_image(i, images, 1)
        # warping the image two times, to help improve accuracy
        warped1 = warp_image(template_image, image)
        warped = warp_image(template_image, warped1)
        # I had a different structure in the code before and resized twice
        # I tried replacing it with one resize of 0.25 but it yields worse results
        resized = cv.resize(warped, (0,0), fx=0.5, fy=0.5)
        image = cv.resize(resized, (0,0), fx=0.5, fy=0.5)    

        answers = get_answers_from_image(image)[0]    
        table1 = get_answers_from_image(image)[1]
        table2 = get_answers_from_image(image)[2]

        guessed_grade = 0
        guessed_option = 'F'
        guessed_variant = 1
        max_grade = 0.0
        for j in range (1, 5):
            f_grade = calculate_grade(answers, ground_truth, 'F', j)
            i_grade = calculate_grade(answers, ground_truth, 'I', j)
            if (max_grade < f_grade):
                guessed_option = 'F'
                guessed_variant = j
                max_grade = f_grade

            if (max_grade < i_grade):
                guessed_option = 'I'
                guessed_variant = j
                max_grade = i_grade

        guessed_grade = float(calculate_grade(answers, ground_truth, guessed_option, guessed_variant))

        written_string = str(image_name + ".jpg" + "    " + str(guessed_grade))
        answers_file.write(written_string)
        answers_file.write('\n')
        print(written_string)
    except:
        print("An exception occurred") # careful, this also blocks kernel intrerrupt
answers_file.close()
print("Done.")

Starting scenario 3 on 75 files
01.jpg    7.9
02.jpg    7.9
03.jpg    5.8
04.jpg    6.7
05.jpg    7.3
06.jpg    9.1
07.jpg    8.2
08.jpg    5.5
09.jpg    7.3
10.jpg    7.6
11.jpg    7.9
12.jpg    7.9
13.jpg    5.8
14.jpg    6.7
15.jpg    7.3
16.jpg    9.1
17.jpg    8.2
18.jpg    5.5
19.jpg    7.3
20.jpg    7.6
21.jpg    3.7
22.jpg    5.2
23.jpg    5.8
24.jpg    4.3
25.jpg    6.1
26.jpg    7.0
27.jpg    5.8
28.jpg    7.0
29.jpg    7.9
30.jpg    5.2
31.jpg    8.5
32.jpg    6.7
33.jpg    6.7
34.jpg    8.5
35.jpg    7.3
36.jpg    7.9
37.jpg    7.9
38.jpg    5.8
39.jpg    6.7
40.jpg    7.3
41.jpg    9.1
42.jpg    8.2
43.jpg    5.5
44.jpg    7.3
45.jpg    7.6
46.jpg    3.7
47.jpg    5.2
48.jpg    5.8
49.jpg    4.3
50.jpg    6.1
51.jpg    7.0
52.jpg    5.8
53.jpg    7.0
54.jpg    7.9
55.jpg    5.2
56.jpg    8.5
57.jpg    6.7
58.jpg    6.7
59.jpg    8.5
60.jpg    7.3
61.jpg    6.1
62.jpg    3.7
63.jpg    7.0
64.jpg    8.8
65.jpg    6.7
66.jpg    5.8
67.jpg    8.2
68.jpg    6.1
69.jpg    6.7
70

In [13]:
# for scenario 4
# frequency of grades. Extracted from the test images
grades = {3.7: 3, 4.0: 5, 4.3: 8, 4.6: 8, 4.9: 12, 5.2: 8, 5.5: 16, 
          5.8: 8, 6.1: 7, 6.4: 17, 6.7: 10, 7.0: 7, 7.3: 10, 7.6: 8, 
          7.9: 4, 8.2: 2, 8.5: 3, 8.8: 8, 9.1: 2, 9.4: 2, 9.7: 2}

def compute_closest_grade(grade):
    closest_grade_1 = 9.7
    closest_grade_2 = 9.7
    for key in grades:
        if (abs(grade - key) < abs(closest_grade_1 - key)):
            closest_grade_2 = closest_grade_1 
            closest_grade_1 = key
    if (grade == closest_grade_1):
        return closest_grade_2
    if (grade == closest_grade_2):
        return closest_grade_1
    else:
        if (grades[closest_grade_1] > grades[closest_grade_2]):
            return closest_grade_1
        else:
            return closest_grade_2
    return 6.4 # most common grade

In [20]:
# Scenario 4
# In each scenario I read the variables from scratch, to be sure
# ground truth, the 8 possible grading scenarios
ground_truth_answers_path = '../Files/ground-truth-correct-answers'
ground_truth = get_ground_truth_answers(ground_truth_answers_path)

# images folder
images = glob.glob("./test_data/4.handwritten/*.jpg") 

# sort them by number
try:
    images.sort(key = lambda x: int(x.split('/')[-1].split('_')[0]))
    total = len(images)
except:
    print("An exception occurred")

# print(images)
# answers file
answers_file = open('dumitriu_andrei_task4.txt', 'w+')

count = 0 # the number of correctly read answers
print("Starting scenario 4 on", total, "files")
for i in range(0, total):
    try: 
        image = read_image(i, images, 1)
        image_name = images[i].split('/')[-1].split('.')[0]

         # warping the image two times, to help improve accuracy
        warped1 = warp_image(template_image, image)
        warped = warp_image(template_image, warped1)
        # I had a different structure in the code before and resized twice
        # I tried replacing it with one resize of 0.25 but it yields worse results
        resized = cv.resize(warped, (0,0), fx=0.5, fy=0.5)
        image = cv.resize(resized, (0,0), fx=0.5, fy=0.5)    

        answers = get_answers_from_image(image)[0]    
        table1 = get_answers_from_image(image)[1]
        table2 = get_answers_from_image(image)[2]

        guessed_grade = 0
        guessed_option = 'F'
        guessed_variant = 1
        max_grade = 0.0
        for j in range (1, 5):
            f_grade = calculate_grade(answers, ground_truth, 'F', j)
            i_grade = calculate_grade(answers, ground_truth, 'I', j)
            if (max_grade < f_grade):
                guessed_option = 'F'
                guessed_variant = j
                max_grade = f_grade

            if (max_grade < i_grade):
                guessed_option = 'I'
                guessed_variant = j
                max_grade = i_grade

        guessed_grade = float(calculate_grade(answers, ground_truth, guessed_option, guessed_variant))
        even_more_guessed_grade = compute_closest_grade(guessed_grade)

        written_string = str(image_name + ".jpg" + "    " + str(even_more_guessed_grade))
        answers_file.write(written_string)
        answers_file.write('\n')
        print(written_string)
    except:
        print("An exception occurred") # careful, this also blocks kernel intrerrupt
    
print('Done.')
    

Starting scenario 4 on 25 files
01_hw.jpg    4.9
02_hw.jpg    6.4
03_hw.jpg    5.5
04_hw.jpg    4.9
05_hw.jpg    7.3
06_hw.jpg    6.1
07_hw.jpg    5.8
08_hw.jpg    6.7
09_hw.jpg    6.1
10_hw.jpg    6.4
11_hw.jpg    3.7
12_hw.jpg    5.8
13_hw.jpg    4.9
14_hw.jpg    3.7
15_hw.jpg    4.9
16_hw.jpg    4.9
17_hw.jpg    6.1
18_hw.jpg    3.7
19_hw.jpg    6.1
20_hw.jpg    4.3
21_hw.jpg    3.7
22_hw.jpg    3.7
23_hw.jpg    3.7
24_hw.jpg    5.5
25_hw.jpg    4.9
Done.


In [None]:
0 