# exam_analysis

This notebook will be used to analyze exams for their use of drawings to solve
organic chemistry problems.  

Author: Sarah Brown (sarahbrown6449@gmail.com)  
Last Updated: April 26th, 2023  

In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.exam_data import Question, Response
from src.utils import *

In [None]:
# Load Question Config
q_file_path = "data/question_config.csv"
question_config = pd.read_csv(q_file_path, index_col=0)

In [None]:
# Load Response Config
r_file_path = "data/response_config.csv"
response_config = pd.read_csv(r_file_path, index_col=0)

## Define output table for overall statistics

In [None]:
# Set up a list to add statistics to for each question
summary_stats_dict = {
    "test" : [1,2,3,4,5],
}

# Title columns with each stat
summary_stats_df = pd.DataFrame(summary_stats_dict).transpose()
summary_stats_df.columns = (
    "Number of drawing techs above 5 %",
    "Number of redrawing techs above 1 %",
    "Average of top 5 drawing techs",
    "Average of top 3 redrawing techs",
    "Average of top 5 drawing techs (w/o text)"
)
summary_stats_df

## Fraction of responses with drawings or redrawings for each type of question

In [None]:
# load question configuration file
q_file_path = "data/question_config.csv"
question_config = pd.read_csv(q_file_path, index_col=0)
r_file_path = "data/response_config.csv"

In [None]:
q_type_CBM = "Choose best molecule"

# find all question ids for choose best molecule questions
q_id_list_CBM = question_config[question_config["type"] == q_type_CBM].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_CBM:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_CBM = []
for q_obj in target_qs:
    fraction_drawing_CBM.append(q_obj.fraction_with_drawing)

fraction_redrawing_CBM = []
for q_obj in target_qs:
    fraction_redrawing_CBM.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_CBM = np.average(fraction_drawing_CBM)

avg_fract_redrawing_CBM = np.average(fraction_redrawing_CBM)

In [None]:
q_type_CBA = "Choose best atom"

# find all question ids for choose best atom questions
q_id_list_CBA = question_config[question_config["type"] == q_type_CBA].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_CBA:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_CBA = []
for q_obj in target_qs:
    fraction_drawing_CBA.append(q_obj.fraction_with_drawing)

fraction_redrawing_CBA = []
for q_obj in target_qs:
    fraction_redrawing_CBA.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_CBA = np.average(fraction_drawing_CBA)

avg_fract_redrawing_CBA = np.average(fraction_redrawing_CBA)

In [None]:
q_type_CBO = "Choose best option"

# find all question ids for choose best option questions
q_id_list_CBO = question_config[question_config["type"] == q_type_CBO].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_CBO:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_CBO = []
for q_obj in target_qs:
    fraction_drawing_CBO.append(q_obj.fraction_with_drawing)

fraction_redrawing_CBO = []
for q_obj in target_qs:
    fraction_redrawing_CBO.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_CBO = np.average(fraction_drawing_CBO)

avg_fract_redrawing_CBO = np.average(fraction_redrawing_CBO)

In [None]:
q_type_rank = "Ranking"

# find all question ids for ranking questions
q_id_list_rank = question_config[question_config["type"] == q_type_rank].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_rank:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_rank = []
for q_obj in target_qs:
    fraction_drawing_rank.append(q_obj.fraction_with_drawing)

fraction_redrawing_rank = []
for q_obj in target_qs:
    fraction_redrawing_rank.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_rank = np.average(fraction_drawing_rank)

avg_fract_redrawing_rank = np.average(fraction_redrawing_rank)

In [None]:
q_type_graph = "Graphing"

# find all question ids for graphing questions
q_id_list_graph = question_config[question_config["type"] == q_type_graph].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_graph:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_graph = []
for q_obj in target_qs:
    fraction_drawing_graph.append(q_obj.fraction_with_drawing)

fraction_redrawing_graph = []
for q_obj in target_qs:
    fraction_redrawing_graph.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_graph = np.average(fraction_drawing_graph)

avg_fract_redrawing_graph = np.average(fraction_redrawing_graph)

In [None]:
q_type_calc = "Calculation"

# find all question ids for calculation questions
q_id_list_calc = question_config[question_config["type"] == q_type_calc].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_calc:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_calc = []
for q_obj in target_qs:
    fraction_drawing_calc.append(q_obj.fraction_with_drawing)

fraction_redrawing_calc = []
for q_obj in target_qs:
    fraction_redrawing_calc.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_calc = np.average(fraction_drawing_calc)

avg_fract_redrawing_calc = np.average(fraction_redrawing_calc)

In [None]:
 # plot
plt.bar(["Choose best \n molecule","Choose best \n atom","Choose best \n option","Graphing","Calculation"],[avg_fract_drawing_CBM*100,avg_fract_drawing_CBA*100,avg_fract_drawing_CBO*100,avg_fract_drawing_graph*100,avg_fract_drawing_calc*100])
plt.title("Average percent of responses with a drawing \n by type of question",fontsize = 18)
plt.ylabel("Percent of responses with drawing", fontsize = 14)
plt.xticks(rotation = 0, fontsize = 10)
plt.ylim(0,1)
plt.ylim(0,100)
plt.savefig("plots/average percent of responses with a drawing by type of question")

#Ranking excluded

In [None]:
# plot
plt.bar(["Choose best molecule","Choose best atom","Choose best option","Graphing","Calculation"], [avg_fract_redrawing_CBM*100,avg_fract_redrawing_CBA*100,avg_fract_redrawing_CBO*100,avg_fract_redrawing_graph*100,avg_fract_redrawing_calc*100])
plt.title("Average percent of responses with a redrawing \n by type of question", fontsize = 18)
plt.ylabel("Percent of responses with redrawing")
plt.xticks(rotation = 270)
plt.ylim(0,100)

#Ranking excluded

## Fraction of responses with drawing if there is/isn't a drawing in the question

In [None]:
# load question configuration file
q_file_path = "data/question_config.csv"
question_config = pd.read_csv(q_file_path, index_col=0)
r_file_path = "data/response_config.csv"

### Choose best option

In [None]:
q_type_CBO = "Choose best option"

# find all question ids for choose best option questions with drawings in question
q_id_filter_CBO_qd = list(question_config["type"] == "Choose best option") and list(question_config["drawing"] == True)
q_id_list_CBO_qd = list(question_config[q_id_filter_CBO_qd].index)

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_CBO_qd:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_CBO_qd = []
for q_obj in target_qs:
    fraction_drawing_CBO_qd.append(q_obj.fraction_with_drawing)

fraction_redrawing_CBO_qd = []
for q_obj in target_qs:
    fraction_redrawing_CBO_qd.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_CBO_qd = np.average(fraction_drawing_CBO_qd)

avg_fract_redrawing_CBO_qd = np.average(fraction_redrawing_CBO_qd)

In [None]:
q_type_CBO = "Choose best option"

# find all question ids for choose best option questions without drawings in question
q_id_filter_CBO_qnd = list(question_config["type"] == "Choose best option") and list(question_config["drawing"] == False)
q_id_list_CBO_qnd = list(question_config[q_id_filter_CBO_qnd].index)

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_CBO_qnd:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_CBO_qnd = []
for q_obj in target_qs:
    fraction_drawing_CBO_qnd.append(q_obj.fraction_with_drawing)

fraction_redrawing_CBO_qnd = []
for q_obj in target_qs:
    fraction_redrawing_CBO_qnd.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_CBO_qnd = np.average(fraction_drawing_CBO_qnd)

avg_fract_redrawing_CBO_qnd = np.average(fraction_redrawing_CBO_qnd)

In [None]:
#plot

plt.bar(["Drawing in question", "No drawing in question"], [avg_fract_drawing_CBO_qd*100,avg_fract_redrawing_CBO_qnd*100])
plt.title("Average percent of responses to CBO \n questions with a drawing", fontsize = 18)
plt.ylabel("Percent of responses with drawing")
plt.xticks(rotation = 270)
plt.ylim(0,100)

### Calculation

In [None]:
q_type_calc = "Calculation"

# find all question ids for calculation questions with drawings in question
q_id_filter_calc_qd = list(question_config["type"] == "Calculation") and list(question_config["drawing"] == True)
q_id_list_calc_qd = list(question_config[q_id_filter_calc_qd].index)

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_calc_qd:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_calc_qd = []
for q_obj in target_qs:
    fraction_drawing_calc_qd.append(q_obj.fraction_with_drawing)

fraction_redrawing_calc_qd = []
for q_obj in target_qs:
    fraction_redrawing_calc_qd.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_calc_qd = np.average(fraction_drawing_calc_qd)

avg_fract_redrawing_calc_qd = np.average(fraction_redrawing_calc_qd)

In [None]:
q_type_calc = "Calculation"

# find all question ids for choose best atom questions
q_id_filter_calc_qnd = list(question_config["type"] == "Calculation") and list(question_config["drawing"] == False)
q_id_list_calc_qnd = list(question_config[q_id_filter_calc_qnd].index)

# loop over all ids, to get each question object, appending each one to a list
target_qs = []
for q_id in q_id_list_calc_qnd:
    q_obj = Question(q_id, q_file_path, r_file_path)
    target_qs.append(q_obj)

# loop over all question objects to obtain desired info
fraction_drawing_calc_qnd = []
for q_obj in target_qs:
    fraction_drawing_calc_qnd.append(q_obj.fraction_with_drawing)

fraction_redrawing_calc_qnd = []
for q_obj in target_qs:
    fraction_redrawing_calc_qnd.append(q_obj.fraction_with_redrawing)

# compile info
avg_fract_drawing_calc_qnd = np.average(fraction_drawing_calc_qnd)

avg_fract_redrawing_calc_qnd = np.average(fraction_redrawing_calc_qnd)

In [None]:
plt.bar(["Drawing in question", "No drawing in question"], [avg_fract_drawing_calc_qd*100,avg_fract_redrawing_calc_qnd*100])
plt.title("Average percent of responses to calculation \n questions with a drawing", fontsize = 18)
plt.ylabel("Percent of responses with drawing")
plt.xticks(rotation = 270)
plt.ylim(0,100)

### Plots with all question types
Note: CBM, CBA, and graphing questions all have drawings in the question by nature

In [None]:
plt.bar(["Choose best molecule", "Choose best option d", "Choose best atom", "Graphing", "Calculation"], [avg_fract_drawing_CBM*100,avg_fract_drawing_CBO_qd*100,avg_fract_drawing_CBA*100,avg_fract_drawing_graph*100,avg_fract_drawing_calc_qd*100])
plt.title("Average percent of responses with a drawing \n when there is a drawing in the question", fontsize = 18)
plt.ylabel("Percent of responses with drawing")
plt.xticks(rotation = 270)
plt.ylim(0,100)

#All choose best molecule/atom, and graphing questions have a drawing
#Ranking excluded

In [None]:
plt.bar(["Choose best option nd", "Calculation"], [avg_fract_drawing_CBO_qnd*100,avg_fract_drawing_calc_qnd*100])
plt.title("Average fraction of responses with a drawing \n when there is no drawing in the question", fontsize = 18)
plt.ylabel("Fraction of responses with drawing")
plt.xticks(rotation = 270)
plt.ylim(0,100)

In [None]:
plt.bar(["Choose best molecule", "Choose best option d", "Choose best atom", "Graphing"], [avg_fract_redrawing_CBM*100,avg_fract_redrawing_CBO_qd*100,avg_fract_redrawing_CBA*100,avg_fract_redrawing_graph*100])
plt.title("Average percent of responses with a redrawing \n when there is a drawing in the question", fontsize = 18)
plt.ylabel("Percent of responses with redrawing")
plt.xticks(rotation = 270)
plt.ylim(0,100)

#All choose best molecule/atom, and graphing questions have a drawing
#Ranking excluded

## Generate drawing, redrawing, and combined distributions for questions containing each type of drawing

In [None]:
redrawing_techs = [col for col in response_config.columns if "redrawing_" in col]

for pic_type in [col for col in question_config.columns if "drawing_" in col]:

    # find desired question ids
    q_id_list = question_config[question_config[pic_type] == True].index
    
    # loop over all ids, to get each question object, appending each one to a list
    target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

    # obtain fraction of students that have used each redrawing technique
    redraw_summary = []
    
    for redraw_type in redrawing_techs:
        redraw_fract_per_draw_type = []
        
        for q in target_qs:
            redraw_tally = 0

            for r in q.responses_list:
                if getattr(r, redraw_type):
                    redraw_tally += 1

            redraw_fract_per_q = redraw_tally / q.number_responses *100
            redraw_fract_per_draw_type.append(redraw_fract_per_q)

        redraw_summary.append(np.average(redraw_fract_per_draw_type))

    # create bar plot of redrawing technique usage for the given drawing type in the question
    
    plt.bar(range(len(redrawing_techs)), redraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(redrawing_techs))), label = redrawing_techs)
    plt.title("Percent of responses with a given redrawing \n for " + pic_type + " questions", fontsize = 18)
    plt.ylabel("Percent of responses with given redrawing")
    ax = plt.gca()
    ax.set_xticks(range(len(redrawing_techs)), redrawing_techs, rotation=315, ha="left")
    plt.ylim(0,100)
    plt.show()
    plt.close()

In [None]:
drawing_techs = [col for col in response_config.columns if "drawing_" in col and "redrawing_" not in col]

for pic_type in [col for col in question_config.columns if "drawing_" in col]:

    # find desired question ids
    q_id_list = question_config[question_config[pic_type] == True].index
    
    # loop over all ids, to get each question object, appending each one to a list
    target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

    # obtain fraction of students that have used each drawing technique
    draw_summary = []
    
    for draw_type in drawing_techs:
        draw_fract_per_draw_type = []
        
        for q in target_qs:
            draw_tally = 0

            for r in q.responses_list:
                if getattr(r, draw_type):
                    draw_tally += 1

            draw_fract_per_q = draw_tally / q.number_responses *100
            draw_fract_per_draw_type.append(draw_fract_per_q)

        draw_summary.append(np.average(draw_fract_per_draw_type))

    # create bar plot of drawing technique usage for the given drawing type in the question
    
    plt.bar(range(len(drawing_techs)), draw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(drawing_techs))), label = drawing_techs)
    plt.title("Percent of responses with a given drawing \n for " + pic_type + " questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(drawing_techs)), drawing_techs, rotation=270)
    plt.ylim(0,100)
    plt.show()
    plt.close()

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')


for pic_type in [col for col in question_config.columns if "drawing_" in col]:

    # find desired question ids
    q_id_list = question_config[question_config[pic_type] == True].index
    
    # loop over all ids, to get each question object, appending each one to a list
    target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

    # obtain fraction of students that have used each drawing technique
    alldraw_summary = []
    
    for alldraw_type in alldrawing_techs:
        alldraw_fract_per_draw_type = []
        
        for q in target_qs:
            alldraw_tally = 0

            for r in q.responses_list:
                if getattr(r, alldraw_type):
                    alldraw_tally += 1

            alldraw_fract_per_q = alldraw_tally / q.number_responses *100
            alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

        alldraw_summary.append(np.average(alldraw_fract_per_draw_type))

    # create bar plot of drawing technique usage for the given drawing type in the question
    
    plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = labels)
    plt.title("Percent of responses with a given drawing \n for " + pic_type + " questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for " + pic_type + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()

    for idx_redraw, tech in enumerate(alldrawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break

    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(alldraw_summary[0:idx_redraw])
    redraw_stats = np.array(alldraw_summary[idx_redraw:])
    draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["pic_type" + str(pic_type)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]

In [None]:
drawing_techs = [col for col in response_config.columns if "drawing_" in col]

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

q_id_list = question_config["q_number"].index

# loop over all ids, to get each question object, appending each one to a list
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

# obtain fraction of students that have used each drawing technique for each question
question_summary = []

for q in target_qs:
    draw_type_fraction = []
    
    for draw_type in drawing_techs:
        draw_tally = 0

        for r in q.responses_list:
            if getattr(r, draw_type):
                draw_tally += 1

        draw_fract_per_q = draw_tally / q.number_responses *100
        draw_type_fraction.append(draw_fract_per_q)

    question_summary.append(draw_type_fraction)


i = 1 # index for titling and saving plots

for data in question_summary:

    plt.bar(range(len(drawing_techs)), data, color=plt.get_cmap("tab20")(np.linspace(0,1,len(drawing_techs))), label = drawing_techs)
    plt.title("Percent of responses with a given drawing \n for question " + str(i), fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(drawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for " + str(target_qs[i-1].q_number) + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()

    for idx_redraw, tech in enumerate(drawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break

    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(data[0:idx_redraw])
    redraw_stats = np.array(data[idx_redraw:])
    draw_stats_no_text = np.array(data[0:idx_text] + data[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["q_number" + str(target_qs[i-1].q_number)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]


    i += 1 # index for titling and saving plots


# create bar plot of redrawing technique usage for the given drawing type in the question

## Overall fraction of students using each drawing technique

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

# find desired question ids
q_id_list = question_config.index

# loop over all ids, to get each question object, appending each one to a list
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

# obtain fraction of students that have used each drawing technique

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

alldraw_summary = []

for alldraw_type in alldrawing_techs:
    alldraw_fract_per_draw_type = []
    i = 0

    for q in target_qs:
        alldraw_tally = 0

        for r in q.responses_list:
            if getattr(r, alldraw_type):
                alldraw_tally += 1

        alldraw_fract_per_q = alldraw_tally / q.number_responses
        alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

        i += 1

    alldraw_summary.append(np.sum(alldraw_fract_per_draw_type)/i *100) 

# create bar plot of drawing technique usage for the given drawing type in the question

plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = alldrawing_techs)
plt.title("Percent of responses with a given drawing \n for all questions", fontsize = 18)
plt.ylabel("Percent of responses with given drawing")
ax = plt.gca()
ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
plt.ylim(0,100)
plt.savefig("plots/Percent of responses with a given drawing for all questions", bbox_inches = "tight")
plt.show()
plt.close()

for idx_redraw, tech in enumerate(alldrawing_techs):
    if "text" in  tech:
        idx_text = idx_redraw
    
    if "redrawing" in tech:
        break

# Log summary statistics
# Separate data into categories - draw, redraw, and draw w/o text
draw_stats = np.array(alldraw_summary[0:idx_redraw])
redraw_stats = np.array(alldraw_summary[idx_redraw:])
draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

# Calculate summarizing metrics
num_draw_5 = sum(draw_stats > 5)
num_redraw_1 = sum(redraw_stats > 1)
avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

# Add summary statistics to combined dictionary for later export to 
# summary_statics.csv
summary_stats_dict["All"] = [
    num_draw_5,
    num_redraw_1,
    avg_draw_5,
    avg_redraw_3,
    avg_draw_5_no_text
]

## Drawing usage when question does/doesn't contain a drawing

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

all_drawing = question_config["drawing"].unique()

# find desired question ids
q_id_list = question_config.index

# look at all questions
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

# loop over [True, False] for "question contains a drawing"
for drawing in all_drawing:
    alldraw_summary = []

    # filter questions
    drawing_qs = []
    for q in target_qs:
        if q.drawing == drawing:
            drawing_qs.append(q)

    # loop over all drawing types
    for alldraw_type in alldrawing_techs:
        alldraw_fract_per_draw_type = []
        i = 0

        for q in drawing_qs:
            alldraw_tally = 0

            for r in q.responses_list:
                if getattr(r, alldraw_type):
                    alldraw_tally += 1

            alldraw_fract_per_q = alldraw_tally / q.number_responses
            alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

            i += 1

        alldraw_summary.append(np.sum(alldraw_fract_per_draw_type)/i *100) 

    # create bar plot of drawing technique usage for questions with a drawing 
    # in the question and those without

    plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = alldrawing_techs)
    plt.title(f"Percent of responses with a given drawing \n for drawing: {drawing} questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for drawing: " + str(drawing) + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()

    for idx_redraw, tech in enumerate(alldrawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break
    
    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(alldraw_summary[0:idx_redraw])
    redraw_stats = np.array(alldraw_summary[idx_redraw:])
    draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["drawing" + str(drawing)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]

## Drawing usage when question does/doesn't require a mental rotation

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

all_rotation = question_config["rotation"].unique()

# find desired question ids
q_id_list = question_config.index

# look at all questions
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

# loop over [True, False] for "question requires a mental rotation"
for rotation in all_rotation:
    alldraw_summary = []

    # filter questions
    rotation_qs = []
    for q in target_qs:
        if q.rotation == rotation:
            rotation_qs.append(q)

    # loop over all drawing types
    for alldraw_type in alldrawing_techs:
        alldraw_fract_per_draw_type = []
        i = 0

        for q in rotation_qs:
            alldraw_tally = 0

            for r in q.responses_list:
                if getattr(r, alldraw_type):
                    alldraw_tally += 1

            alldraw_fract_per_q = alldraw_tally / q.number_responses
            alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

            i += 1

        alldraw_summary.append(np.sum(alldraw_fract_per_draw_type)/i *100) 

    # create bar plot of drawing technique usage for the given drawing type in the question

    plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = alldrawing_techs)
    plt.title(f"Percent of responses with a given drawing \n for mental rotation: {rotation} questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for mental rotation: " + str(rotation) + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()

    for idx_redraw, tech in enumerate(alldrawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break

    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(alldraw_summary[0:idx_redraw])
    redraw_stats = np.array(alldraw_summary[idx_redraw:])
    draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["rotation" + str(rotation)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]

## Drawing usage for each type of reaction scheme

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

all_reaction = question_config["reaction"].unique()

# find desired question ids
q_id_list = question_config.index

# look at all questions
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

# loop over types of reaction schemes - [reactants, None, full, equation]
for reaction in all_reaction:
    alldraw_summary = []

    # filter questions
    reaction_qs = []
    for q in target_qs:
        if q.reaction == reaction:
            reaction_qs.append(q)

    # loop over all drawing types
    for alldraw_type in alldrawing_techs:
        alldraw_fract_per_draw_type = []
        i = 0

        for q in reaction_qs:
            alldraw_tally = 0

            for r in q.responses_list:
                if getattr(r, alldraw_type):
                    alldraw_tally += 1

            alldraw_fract_per_q = alldraw_tally / q.number_responses
            alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

            i += 1

        alldraw_summary.append(np.sum(alldraw_fract_per_draw_type)/i *100) 

    # create bar plot of drawing technique usage for the given drawing type in the question

    plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = alldrawing_techs)
    plt.title(f"Percent of responses with a given drawing \n for {reaction} questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for " + str(reaction) + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()

    for idx_redraw, tech in enumerate(alldrawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break

    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(alldraw_summary[0:idx_redraw])
    redraw_stats = np.array(alldraw_summary[idx_redraw:])
    draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["reaction" + str(reaction)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]

## Drawing usage for each of the course units

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

all_unit = question_config["unit"].unique()

# find desired question ids
q_id_list = question_config.index

# look at all questions
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

# loop over each of the units
for unit in all_unit:
    alldraw_summary = []

    # filter questions
    unit_qs = []
    for q in target_qs:
        if q.unit == unit:
            unit_qs.append(q)

    # loop over all drawing types
    for alldraw_type in alldrawing_techs:
        alldraw_fract_per_draw_type = []
        i = 0

        for q in unit_qs:
            alldraw_tally = 0

            for r in q.responses_list:
                if getattr(r, alldraw_type):
                    alldraw_tally += 1

            alldraw_fract_per_q = alldraw_tally / q.number_responses
            alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

            i += 1

        alldraw_summary.append(np.sum(alldraw_fract_per_draw_type)/i *100) 

    # create bar plot of drawing technique usage for the given drawing type in the question

    plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = alldrawing_techs)
    plt.title(f"Percent of responses with a given drawing \n for {unit} questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for " + str(unit) + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()


    for idx_redraw, tech in enumerate(alldrawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break

    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(alldraw_summary[0:idx_redraw])
    redraw_stats = np.array(alldraw_summary[idx_redraw:])
    draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["unit" + str(unit)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]

## Drawing usage when question does/doesn't contain electron flow

In [None]:
alldrawing_techs = [col for col in response_config.columns if "drawing_" in col]

all_electronflow = question_config["electronflow"].unique()

# find desired question ids
q_id_list = question_config.index

# look at all questions
target_qs = get_q_objs_from_q_ids(q_id_list, q_file_path, r_file_path)

labels = ('Arrows','Numbering','Steps','Mapping','Text','Product','Molecule','Colour Coding','Charges','Graph','Resonance','Line','Wedge/Dash','Newman','Chair','Structural','Condensed','Orbitals','Copying')

# loop over [True, False] for "question contains electron flow"
for electronflow in all_electronflow:
    alldraw_summary = []

    # filter questions
    electronflow_qs = []
    for q in target_qs:
        if q.electronflow == electronflow:
            electronflow_qs.append(q)

    # loop over all drawing types
    for alldraw_type in alldrawing_techs:
        alldraw_fract_per_draw_type = []
        i = 0

        for q in electronflow_qs:
            alldraw_tally = 0

            for r in q.responses_list:
                if getattr(r, alldraw_type):
                    alldraw_tally += 1

            alldraw_fract_per_q = alldraw_tally / q.number_responses
            alldraw_fract_per_draw_type.append(alldraw_fract_per_q)

            i += 1

        alldraw_summary.append(np.sum(alldraw_fract_per_draw_type)/i *100) 

    # create bar plot of drawing technique usage for the given drawing type in the question

    plt.bar(range(len(alldrawing_techs)), alldraw_summary, color=plt.get_cmap("tab20")(np.linspace(0,1,len(alldrawing_techs))), label = alldrawing_techs)
    plt.title(f"Percent of responses with a given drawing \n for electron flow: {electronflow} questions", fontsize = 18)
    plt.ylabel("Percent of responses with given drawing")
    ax = plt.gca()
    ax.set_xticks(range(len(alldrawing_techs)), labels, rotation=270)
    plt.ylim(0,100)
    plt.savefig("plots/Percent of responses with a given drawing for electron flow: " + str(electronflow) + " questions", bbox_inches = "tight")
    plt.show()
    plt.close()

    for idx_redraw, tech in enumerate(alldrawing_techs):
        if "text" in  tech:
            idx_text = idx_redraw
        
        if "redrawing" in tech:
            break

    # Log summary statistics
    # Separate data into categories - draw, redraw, and draw w/o text
    draw_stats = np.array(alldraw_summary[0:idx_redraw])
    redraw_stats = np.array(alldraw_summary[idx_redraw:])
    draw_stats_no_text = np.array(alldraw_summary[0:idx_text] + alldraw_summary[idx_text+1:idx_redraw])

    # Calculate summarizing metrics
    num_draw_5 = sum(draw_stats > 5)
    num_redraw_1 = sum(redraw_stats > 1)
    avg_draw_5 = np.mean(draw_stats[np.argpartition(draw_stats, -5)[-5:]])
    avg_redraw_3 = np.mean(redraw_stats[np.argpartition(redraw_stats, -3)[-3:]]) 
    avg_draw_5_no_text = np.mean(draw_stats_no_text[np.argpartition(draw_stats_no_text, -5)[-5:]])

    # Add summary statistics to combined dictionary for later export to 
    # summary_statics.csv
    summary_stats_dict["electronflow" + str(electronflow)] = [
        num_draw_5,
        num_redraw_1,
        avg_draw_5,
        avg_redraw_3,
        avg_draw_5_no_text
    ]

## Export summary statistics to excel

In [None]:
summary_stats_df = pd.DataFrame(summary_stats_dict).transpose()
summary_stats_df.columns = (
    "Number of drawing techs above 5 %",
    "Number of redrawing techs above 1 %",
    "Average of top 5 drawing techs",
    "Average of top 3 redrawing techs",
    "Average of top 5 drawing techs (w/o text)"
)
summary_stats_df

In [None]:
summary_stats_df.to_csv("summary_statistics.csv")

## Summary

In total, this notebook has performed the following analysis based on the fall 2018 organic chemistry exams:  
1. Compare drawing usage for different question types  
2. Compare drawing usage based on indicators (drawings) within the question  
3. Compare drawing usage for different units/concepts  