### Work with answer folder: read file, save all correct answers of each code into a dictionary {code: [key]}

In [1]:
import os
import cv2
from skimage.io import imread, imshow
import numpy as np
import pandas as pd

In [2]:
fn_asm = os.listdir('./data')
fn_ans = os.listdir('./answer')

keys_dict = {}

In [3]:
def get_5choices(box5, all_answers):
    box5 = cv2.cvtColor(box5, cv2.COLOR_BGR2GRAY)

    _, thresh = cv2.threshold(box5, 0, 255, cv2.THRESH_BINARY_INV)
    cnts, hierarychy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    choices = []

    for cnt in cnts:
        area = cv2.contourArea(cnt)
        if area > 100:
            center = np.round(np.mean(cnt, axis=0)[0]).astype(int)
            choices.append(center)

    choices.sort(key=lambda x:x[1])
    
    for choice in choices:
        x = choice[0]
        if 200 < x < 300:
            all_answers.append('A')
        elif 300 < x < 400:
            all_answers.append('B')
        elif 400 < x < 500:
            all_answers.append('C')
        elif 500 < x < 600:
            all_answers.append('D')
        elif 600 < x < 700:
            all_answers.append('E')

In [4]:
def get_full_choices(image):
    crop = image[690:2800, 430:2100]
    height = 300
    width = 740
    step_ver = 40
    step_hor = 140

    start_hor = 0
    index = 0
    all_answers = []

    for col in range(2):
        start_ver = 0
        for row in range(6):
            index += 1
            get_5choices(crop[start_ver:start_ver + height, start_hor:start_hor + width], all_answers)
            start_ver = start_ver + height + step_ver
        start_hor = start_hor + width + step_hor
        
    return all_answers

In [5]:

for fn in fn_ans:
    code = fn[:-4]
    path = './answer/' + fn
    key_img = imread(path)
    correct_ans = get_full_choices(key_img)
    
    keys_dict[code] = correct_ans

### Loop through all students' sheets, save to dictionary of dataframes with key = code
#### {code: df_of_code}
#### df_of_code: columns = [student id, fullname, answers from 1 - 60]

In [6]:
dfs_dict = {}

col_list = ['ID', 'Fullname']
ques_list = [str(x) for x in range(1, 61)]
col_list.extend(ques_list)

ques_correct= {}

In [7]:
for asm in fn_asm:
    std_id = asm[:asm.index('_')]
    rem = asm[asm.index('_') + 1:]
    fullname = rem[:rem.index('_')]
#     sname, fname = split_name(fullname)
    code = rem[rem.index('_') + 1 : rem.index('.')]
    
    path = './data/' + asm
    sheet = imread(path)
    full_choices = get_full_choices(sheet)
    
    add_list = [std_id, fullname]
    add_list.extend(full_choices)
    
    if code not in dfs_dict:
        dfs_dict[code] = pd.DataFrame(columns=col_list)
        
    df_add = pd.DataFrame([add_list], columns=col_list) 
    dfs_dict[code] = (dfs_dict[code].append(df_add)).reset_index(drop=True)

### Add 2 rows at the end of df
#### Row "key": keys for all questions
#### Row "correct": percentage of correct answers in the question

In [8]:
for code in dfs_dict:
    anss = keys_dict[code]
    row_key = ['', '']
    row_key.extend(anss)
    
    row_correct = ['', '']
    for q in ques_list:
        lst = list(dfs_dict[code][q])
        key = anss[int(q) - 1]
        correct = lst.count(key) / len(lst)
        row_correct.append(correct)
        
        if code not in ques_correct:
            ques_correct[code] = []
        
        ques_correct[code].append((q, correct))
    
    df_add = pd.DataFrame([row_key, row_correct], columns=col_list, index=['KEY', 'CORRECT']) 
    dfs_dict[code] = (dfs_dict[code].append(df_add))

In [10]:
for code in ques_correct:
    ques_correct[code].sort(key=lambda x:x[1])
    print(f'[CODE {code}] 3 most-difficult questions with percentage of correct answers: ', ques_correct[code][0:3])

[CODE 3B] 3 most-difficult questions with percentage of correct answers:  [('26', 0.2), ('27', 0.2), ('28', 0.2)]
[CODE 3A] 3 most-difficult questions with percentage of correct answers:  [('27', 0.0), ('28', 0.0), ('31', 0.0)]


In [14]:
for code in dfs_dict:
    dfs_dict[code].to_csv(f'Stats_question6_Code{code}.csv')