# Imports

In [None]:
from canvasapi import Canvas

from os.path import join

from autocanvas.config import INPUT_DIR, OUTPUT_DIR

from autocanvas.core.conversions import (
    series_from_api_object, 
    df_from_api_list)

from autocanvas.core.course_info import (
    get_PHY_course, 
    get_assignment_group_from_name, 
    get_teaching_personel,
    get_students_from_sections,)

from autocanvas.core.assignments import (
    get_assignment,
    get_assignment_submissions,
    get_graded_submissions,
    get_submitted_submissions,
    get_assignment_groups,
    get_assignment_collection,
    get_submissions_in_collection,)

import re
from datetime import datetime
import pandas as pd
import seaborn as sns
pd.set_option('display.max_columns', 500)
import matplotlib.pyplot as plt

API_URL = "https://ufl.instructure.com/"
try:
    from autocanvas.config import get_API_key
    API_KEY = get_API_key()
except FileNotFoundError as e:
    print(e)
    API_KEY = input("Asking for API token")

canvas = Canvas(API_URL, API_KEY)

In [None]:
plt.rcParams.update({'font.size': 18})

In [None]:
course = get_PHY_course(canvas, 
                        course_code="PHY2054", 
                        semester="Spring 2021")
print(course.name)

In [None]:
df_TAs, df_teachers = get_teaching_personel(course, add_first_name=True, 
                          groups=["ta", "teacher"])

file_name = "section_ta_phy2054_spring2021.csv"
file_path = join(INPUT_DIR, file_name)

df_students, df_sections = get_students_from_sections(
                                course, 
                                section_ta_csv=file_path)
print("Completed importing students")

# Quiz

In [None]:
quiz_number = 9

assignment = get_assignment(course, group="Recitation Quizzes", 
               number=quiz_number, ignore_makeup=True)

df_subs = get_assignment_submissions(
                                assignment=assignment, 
                                df_students=df_students, 
                                df_TAs=df_TAs)

In [None]:
df_graded = get_graded_submissions(df_subs)
df_submitted = get_submitted_submissions(df_subs)

In [None]:
df_subs.groupby("workflow_state").count()

In [None]:
len(df_graded)
len(df_submitted)

# Corresponding Makeup Quiz

In [None]:
makeup_name = "Makeup Quiz {}".format(quiz_number)
makeup_assignment = get_assignment(course, name=makeup_name)
print(makeup_assignment["name"])
df_makeup_subs = get_assignment_submissions(
                                assignment=makeup_assignment, 
                                df_students=df_students, 
                                df_TAs=df_TAs)
print("Number of makeups:", len(df_makeup_subs))
df_makeup_graded = get_graded_submissions(df_makeup_subs)
print("Number of graded makeups:", len(df_makeup_graded))
df_makeup_submitted = get_submitted_submissions(df_makeup_subs)
print("Number of submitted makeups:", len(df_makeup_submitted))

The options that the instructs have for the "status" of the student submission are: `excused`, `missing`, `late`, `None`. The internal status the Canvas keeps track can be: `graded`, `pending_review`, `submitted`, `unsubmitted`. Students with no submission normally appear with `missing==True`, `excused==None` and `status==unsubmitted`. If the TA puts a grade in these students and then removes it these students will appear **still** with `missing==True`, but with `excused==False` and `status==graded` (weirdly, despite that Canvas will have an orange circle, instead of a tickmark). If the TA excuses the student (with or without submission) then `excused==True`, `missing==False` and `status==graded`.

In [None]:
df_graded_combined = pd.concat((df_graded, df_makeup_graded))
# student that appear with valid graded submissions multiple times
df_graded_combined[df_graded_combined.user_id.duplicated(keep=False)]

In [None]:
print(len(df_graded))
print(len(df_makeup_graded))
print(len(df_graded_combined))
print(len(df_graded_combined.user_id) - len(df_graded_combined.user_id.drop_duplicates()))

Remove duplicates and keep last. "Last" is currently not very robust - make it submission date depenent. 

In [None]:
df_graded_combined = df_graded_combined.drop_duplicates(subset=["user_id"],keep="last")
# should be zero
print(len(df_graded_combined.user_id) - len(df_graded_combined.user_id.drop_duplicates()))

# TA Grades: Plots and Tables 

In [None]:
ta_order = (df_subs.section_ta_first_name
            .drop_duplicates()
            .sort_values()
            .to_list()
           )
ta_order

In [None]:
# first make figure more wide to fit all points, otherwise points might be hidden
plt.figure(figsize=(15,6))
sns.swarmplot(data=df_graded_combined, 
              y="score", 
              x="grader_first_name",
              s=2,
              order=ta_order)
plt.xlabel("")
plt.ylabel("Student scores", fontsize=20)
plt.axhline(y=7.5, c="k", ls="--", zorder=0)
plt.ylim(0, 10.5)
plt.title(assignment.name.iloc[0], fontsize=20)
plt.tight_layout()
plot_path = join(OUTPUT_DIR, "swarmplot_quiz_{}.png"
                             .format(quiz_number))
plt.savefig(plot_path, 
            facecolor='w', 
            transparent=False)

In [None]:
plt.figure(figsize=(12,6))
sns.boxplot(data=df_graded_combined, 
            y="score", 
            x="grader_first_name",
            order=ta_order
           )
plt.xlabel("")
plt.ylabel("Student scores", fontsize=20)
plt.axhline(y=7.5, c="k", ls="--", zorder=0)
plt.ylim(0, 10.5)
plt.title(assignment.name.iloc[0], fontsize=20)
plt.tight_layout()
plot_path = join(OUTPUT_DIR, "boxplot_quiz_{}.png"
                             .format(quiz_number))
plt.savefig(plot_path, 
            facecolor='w', 
            transparent=False)

In [None]:
plt.figure(figsize=(12,6))
sns.violinplot(data=df_graded_combined, 
               y="score", 
               x="grader_first_name",
               order=ta_order
              )
plt.xlabel("")
plt.ylabel("Student scores", fontsize=20)
plt.axhline(y=7.5, c="k", ls="--", zorder=0)
plt.ylim(0, 10.5)
plt.title(assignment.name.iloc[0], fontsize=20)
plt.tight_layout()
plot_path = join(OUTPUT_DIR,"violinplot_quiz_{}.png"
                             .format(quiz_number))
plt.savefig(plot_path, 
            facecolor='w', 
            transparent=False)

In [None]:
len(df_graded_combined)

In [None]:
averages = df_graded_combined.groupby("grader_first_name").mean()["score"]
print(averages.round(2).to_markdown())

In [None]:
medians = df_graded_combined.groupby("grader_first_name").median()["score"]
print(medians.round(2).to_markdown())

In [None]:
pd.__version__

# Check Quiz

In [None]:
df_submitted[df_submitted["workflow_state"]!="graded"]

All due dates should be after submission dates.  

In [None]:
df_submitted[(df_submitted["cached_due_date_date"] < df_submitted["submitted_at_date"])]

The grader TA should be the section TA. If it is not, then the grade might be incomplete

In [None]:
df_graded[(df_graded["grader_first_name"] != df_graded["section_ta_first_name"])]

Naively I expected that all cached due dates would be strictly less than graded_at_dates.  
That's not the case because:
1. some TAs might grade while they wait for their recitation session to end.
2. weirdly, when you award a fudge point, the actual graded_at_date gets overwritten
   by the student submitted_at_date. The same might be happening with comments

Here are the overwritten graded_at_dates:

In [None]:
overwritten_graded_times = df_graded[df_graded["submitted_at_date"] == df_graded["graded_at_date"]]
len(overwritten_graded_times)

But still none should be graded before it is submitted:

In [None]:
faster_than_light = df_graded[df_graded["submitted_at_date"] > df_graded["graded_at_date"]]
len(faster_than_light)

# Get Time Evolution by TA

The following cells are used to check how the grading is progressing. Ignoring Makeups

In [None]:
total_submitted_ta = (df_submitted.groupby(by="section_ta_first_name")
                                  .workflow_state.count()
                                  .rename("total_submitted")
                     )
total_submitted_ta

In [None]:
df_status_ta = df_submitted.pivot_table(index="section_ta_first_name", columns="workflow_state", 
                    aggfunc="count", values="user_id").fillna(0)
df_status_ta 

In [None]:
# get earliest possible date for each TA grading, 
# defined to be equal to the earliest student submission date for each TA
# used to generate the starting point for progress
from datetime import timedelta

earliest_times = (df_submitted
                  [["submitted_at_date", "section_ta_first_name"]].copy()
                    .dropna(axis=0)
                    .groupby(by="section_ta_first_name").agg("min")
                    .reset_index(drop=False)
                    .rename(columns={"submitted_at_date": "graded_at"})
                 )
# nothing graded yet, that's why the grade counter is still zero
earliest_times["occurences"] = 0
earliest_times

In [None]:
df_graded_at = (df_graded[["graded_at_date","section_ta_first_name"]].copy()
                        .rename(columns={"graded_at_date": "graded_at"}))
df_graded_at["occurences"] = 1
df_graded_at = pd.concat([earliest_times, df_graded_at]).reset_index(drop=True)

# group by TA and by seconds
delta_t = '1s'
df_graded_at = df_graded_at.groupby([pd.Grouper(key='section_ta_first_name'),
                                     pd.Grouper(key='graded_at', freq=delta_t)
                                    ]).sum()
df_graded_at[df_graded_at['occurences']==0]
# calculate cumulative sum for each TA
df_graded_at['cumsum'] = df_graded_at.groupby(level=0)['occurences'].cumsum()

df_graded_at.reset_index(inplace=True)

df_graded_at["progress"] = (df_graded_at.set_index('section_ta_first_name')["cumsum"] / 
                            total_submitted_ta[df_graded_at["section_ta_first_name"]]).reset_index(drop=True) * 100
df_graded_at.head(5)

In [None]:
# df_graded_at[df_graded_at["section_ta_first"]=="Ioannis"]

As mentioned above, there is a bug on the stored `graded_at` times and many times are overwritten by the `submitted_at` time, which is at the time of the quiz. The bug seems to affect TA comments and Fudge points.

In [None]:
import matplotlib.ticker as mtick

up_to_now = False
plt.figure(figsize=(15,10))
marker = 'o' if delta_t=='1s' else None 
g = sns.lineplot(data=df_graded_at, x="graded_at", y="progress", 
             hue="section_ta_first_name", drawstyle="steps-post", 
                 marker=marker, legend=None);
plt.xticks(rotation=30);
plt.axhline(y=100,c="k",zorder=0,lw=0.5,ls="--")

# deadline at end of Wednesday ET
# 5 hour difference between ET and UTC
grading_deadline = pd.to_datetime(earliest_times["graded_at"].min().date()) \
                                  + pd.DateOffset(days=9, hours=5)
print(grading_deadline)

plt.axvline(x=grading_deadline,c="k",zorder=0,lw=2.5,ls="--", label="Deadline")
if up_to_now:
    plt.axvline(x=datetime.utcnow(),c="m",zorder=0,lw=2.5,ls="-.", label="Present")
plt.ylim(0,100)
plt.title("{} Grading Progress".format(assignment.name.iloc[0]))

plt.xlabel("Graded At (UTC)");
lines = g.get_lines()
for index, ta_first in enumerate(ta_order):
    line_name = "Line2D(_line{})".format(index)
    line_2d = [x for x in lines if line_name==x.__str__()][0]
#     print(ta_first, line_2d.__str__())
    missing = int(total_submitted_ta.loc[ta_first] - 
                  df_status_ta.loc[ta_first].graded)
#     print(missing)
    line_2d.set_label("{} ({})".format(ta_first, missing))
plt.legend(loc='lower right')
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter())
plt.tight_layout()
plot_path = join(OUTPUT_DIR, "quiz_{}_grading_progress.png"
                             .format(quiz_number))
plt.savefig(plot_path, 
            facecolor='w', 
            transparent=False)

In [None]:
# add manually points to the line2d artists
# import numpy as np
# artists = plt.gca().get_children()

# for index, ta_first in enumerate(ta_order):
#     line_name = "Line2D(_line{})".format(index)
#     line_2d = [x for x in artists if line_name==x.__str__()][0]
#     print(ta_first, line_2d.__str__())
#     x_start = earliest_times.iloc[index]
#     y_start = 0
#     xdata = np.insert(line_2d.get_xdata(), obj=0, values=x_start)
#     print(xdata)
#     ydata = np.insert(line_2d.get_ydata(), obj=0, values=y_start)
#     line_2d.set_data
# item.set_data()