### Import all necessary packages

In [1]:
import os
import cv2
from skimage.io import imread, imshow
import numpy as np
import pandas as pd

### Get all filenames from data folder (students' answer sheets) and answer folder (correct answers of codes)

In [2]:
fn_asm = os.listdir('./data') #filename assignments - students' answer sheet
fn_ans = os.listdir('./answer') #filename answers - correct answers of all codes

### Define all necessary functions

#### Split a filename into student id, fullname, code

In [3]:
def split_filename(asm):
    std_id = asm[:asm.index('_')]
    rem = asm[asm.index('_') + 1:]
    fullname = rem[:rem.index('_')]
    code = rem[rem.index('_') + 1 : rem.index('.')]
    
    return std_id, fullname, code

#### Split a fullname into first name and surname

In [4]:
def split_name(fullname: str):
    position = []
    for i in range(len(fullname)):
        if fullname[i].isupper():
            position.append(i)
            
    sname = fullname[:position[1]]
    fname = fullname[position[-1]:]
    
    return sname, fname

#### Get choices of every 5 lines

In [5]:
def get_5choices(box5, all_answers):
    box5 = cv2.cvtColor(box5, cv2.COLOR_BGR2GRAY)

    _, thresh = cv2.threshold(box5, 0, 255, cv2.THRESH_BINARY_INV)
    cnts, hierarychy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    choices = []

    for cnt in cnts:
        area = cv2.contourArea(cnt)
        if area > 100:
            center = np.round(np.mean(cnt, axis=0)[0]).astype(int)
            choices.append(center)

    choices.sort(key=lambda x:x[1])
    
    for choice in choices:
        x = choice[0]
        if 200 < x < 300:
            all_answers.append('A')
        elif 300 < x < 400:
            all_answers.append('B')
        elif 400 < x < 500:
            all_answers.append('C')
        elif 500 < x < 600:
            all_answers.append('D')
        elif 600 < x < 700:
            all_answers.append('E')

#### Get full choices of an answer sheet

In [6]:
def get_full_choices(image):
    crop = image[690:2800, 430:2100]
    height = 300
    width = 740
    step_ver = 40
    step_hor = 140

    start_hor = 0
    index = 0
    all_answers = []

    for col in range(2):
        start_ver = 0
        for row in range(6):
            index += 1
            get_5choices(crop[start_ver:start_ver + height, start_hor:start_hor + width], all_answers)
            start_ver = start_ver + height + step_ver
        start_hor = start_hor + width + step_hor
        
    return all_answers

#### Grade a student

In [7]:
def grading(choices, code):
    global keys_dict
    keys = keys_dict[code]
    
    correct_answers = len([i for i,j in zip(choices, keys) if i == j])
    
    return round(correct_answers/len(choices), 2)*10

### Question 2: Generate student.csv from the data folder

In [8]:
StudentID, Surname, Firstname, Code = [], [], [], []

for asm in fn_asm:
    std_id, fullname, code = split_filename(asm)
    sname, fname = split_name(fullname)
    
    StudentID.append(std_id)
    Code.append(code)
    Surname.append(sname)
    Firstname.append(fname)
    
stds_df = pd.DataFrame(columns = ['StudentID', 'Surname', 'Firstname', 'Code'], index=range(1,21))
stds_df['StudentID'] = StudentID
stds_df['Surname'] = Surname
stds_df['Firstname'] = Firstname
stds_df['Code'] = Code

stds_df.to_csv('student.csv', index_label = False)

#### Choose 1 student to do Question 3 and Question 4

In [9]:
print('Students list:')
for i in range(len(fn_asm)):
    print(f'Number: {i+1} - {fn_asm[i][:-4]}')

std_index = int(input('\nChoose 1 student to view all answer, input 1 number: ')) - 1

print(f'\nStudent number {std_index + 1} record: {fn_asm[std_index][:-4]}')

fn = fn_asm[std_index]

path = './data/' + fn
image = imread(path)

Students list:
Number: 1 - 2000111_NguyenVanAn_3B
Number: 2 - 2000112_LePhanBinh_3B
Number: 3 - 2000113_TranDan_3B
Number: 4 - 2000114_PhanNhanHy_3B
Number: 5 - 2000115_VoMinh_3B
Number: 6 - 2000116_TrinhNgoMai_3B
Number: 7 - 2000117_DinhThiHao_3B
Number: 8 - 2000118_TranHao_3B
Number: 9 - 2000119_LeDinhKhai_3B
Number: 10 - 2000120_NgoVanLinh_3B
Number: 11 - 2000121_NguyenNhan_3A
Number: 12 - 2000122_TranQuynh_3A
Number: 13 - 2000123_VinhHao_3A
Number: 14 - 2000124_DiepTan_3A
Number: 15 - 2000125_HoKhoa_3A
Number: 16 - 2000126_HoTrung_3A
Number: 17 - 2000127_PhamHuy_3A
Number: 18 - 2000128_PhuocNguyen_3A
Number: 19 - 2000129_NguyenNgoc_3A
Number: 20 - 2000130_MinhNhan_3A

Choose 1 student to view all answer, input 1 number: 14

Student number 14 record: 2000124_DiepTan_3A


### Question 3: Generating the first 5 answers of one student

In [10]:
crop = image[690:2800, 430:2100]
first5_box = crop[0:300, 0:730]
first5_ans = []

get_5choices(first5_box, first5_ans)

print(f'First 5 answers of {fn[:-4]}: {first5_ans}')

First 5 answers of 2000124_DiepTan_3A: ['C', 'D', 'E', 'E', 'E']


### Question 4: Generating all answers of one student

In [11]:
all_ans = get_full_choices(image)

print(f'All answers of {fn[:-4]}: ')
pd.set_option('display.max_rows', None)
std_anss = pd.DataFrame(index = range(1, 61), columns = ['Choice'])
std_anss['Choice'] = all_ans

std_anss

All answers of 2000124_DiepTan_3A: 


Unnamed: 0,Choice
1,C
2,D
3,E
4,E
5,E
6,A
7,B
8,B
9,E
10,E


### Question 5: Generate grading.csv

#### Create a dictionary of correct answers for each code

In [12]:
keys_dict = {}

for fn in fn_ans:
    code = fn[:-4]
    path = './answer/' + fn
    key_img = imread(path)
    correct_ans = get_full_choices(key_img)
    
    keys_dict[code] = correct_ans

In [13]:
all_id = []
grades = []

for asm in fn_asm:
    std_id, fullname, code = split_filename(asm)
    
    path = './data/' + asm
    sheet = imread(path)
    full_choices = get_full_choices(sheet)
    
    all_id.append(std_id)
    grades.append(grading(full_choices, code))
    
df_grades = pd.DataFrame(columns=['Student ID', 'Grades'], index=range(1, len(fn_asm)+1))
df_grades['Student ID'] = all_id
df_grades['Grades'] = grades

df_grades.to_csv('grading.csv')

### Question 6: Summary which 3 question are the most difficult

#### Loop through all students' sheets, save to dictionary of dataframes with key = code
#### {code: df_of_code}
#### df_of_code: columns = [student id, fullname, answers from 1 - 60]

In [14]:
dfs_dict = {}

col_list = ['ID', 'Fullname']
ques_list = [str(x) for x in range(1, 61)]
col_list.extend(ques_list)

ques_correct= {}

for asm in fn_asm:
    std_id, fullname, code = split_filename(asm)
    
    path = './data/' + asm
    sheet = imread(path)
    full_choices = get_full_choices(sheet)
    
    add_list = [std_id, fullname]
    add_list.extend(full_choices)
    
    if code not in dfs_dict:
        dfs_dict[code] = pd.DataFrame(columns=col_list)
        
    df_add = pd.DataFrame([add_list], columns=col_list) 
    dfs_dict[code] = (dfs_dict[code].append(df_add)).reset_index(drop=True)

#### Add 2 rows at the end of df
#### Row "key": keys for all questions
#### Row "correct": percentage of correct answers in the question

In [15]:
for code in dfs_dict:
    anss = keys_dict[code]
    row_key = ['', '']
    row_key.extend(anss)
    
    row_correct = ['', '']
    for q in ques_list:
        lst = list(dfs_dict[code][q])
        key = anss[int(q) - 1]
        correct = lst.count(key) / len(lst)
        row_correct.append(correct)
        
        if code not in ques_correct:
            ques_correct[code] = []
        
        ques_correct[code].append((q, correct))
    
    df_add = pd.DataFrame([row_key, row_correct], columns=col_list, index=['KEY', 'CORRECT']) 
    dfs_dict[code] = (dfs_dict[code].append(df_add))
    
for code in ques_correct:
    ques_correct[code].sort(key=lambda x:x[1])
    print(f'[CODE {code}] 3 most-difficult questions with percentage of correct answers: ', ques_correct[code][0:3])

[CODE 3B] 3 most-difficult questions with percentage of correct answers:  [('26', 0.2), ('27', 0.2), ('28', 0.2)]
[CODE 3A] 3 most-difficult questions with percentage of correct answers:  [('27', 0.0), ('28', 0.0), ('31', 0.0)]


#### Save stats of each code into csv files

In [16]:
for code in dfs_dict:
    dfs_dict[code].to_csv(f'Stats_question6_Code{code}.csv')

### Question 7: Generate final results (pass/fail) of the class

In [17]:
all_id = []
grades = []
pass_fail = []

for asm in fn_asm:
    std_id, fullname, code = split_filename(asm)
    
    path = './data/' + asm
    sheet = imread(path)
    full_choices = get_full_choices(sheet)
    grade = grading(full_choices, code)
    
    all_id.append(std_id)
    grades.append(grade)
    if grade >=6.5:
        pass_fail.append('pass')
    else:
        pass_fail.append('fail')
        
df_result = pd.DataFrame(columns=['Student ID', 'Grades', 'Pass/Fail'], index=range(1, len(fn_asm)+1))
df_result['Student ID'] = all_id
df_result['Grades'] = grades
df_result['Pass/Fail'] = pass_fail

df_result.to_csv('FinalResult.csv')