In [None]:
from canvasapi import Canvas # pip install canvasapi

from os.path import join

from autocanvas.config import INPUT_DIR, OUTPUT_DIR

from autocanvas.core.conversions import (
    series_from_api_object, 
    df_from_api_list)

from autocanvas.core.course_info import (
    get_PHY_course, 
    get_assignment_group_from_name, 
    get_teaching_personel,
    get_students_from_sections,)

from autocanvas.core.assignments import (
    get_assignment,
    get_assignment_submissions,
    get_graded_submissions,
    get_submitted_submissions,
    get_assignment_groups,
    get_assignment_collection,
    get_submissions_in_collection,
    get_student_answers,
    get_quiz,
    get_question_ids)

# async routines are needed here, because there is no 
# API method to retrieve the questions for all students
# so we need to to hundreds of calls.
import aiohttp
import asyncio

import re
import difflib
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 500)

API_URL = "https://ufl.instructure.com/"
try:
    from autocanvas.config import get_API_key
    API_KEY = get_API_key()
except FileNotFoundError as e:
    print(e)
    API_KEY = input("Asking for API token")

canvas = Canvas(API_URL, API_KEY)

# Get General course Info

In [None]:
course = get_PHY_course(canvas, 
                        course_code="PHY2053", 
                        semester="Summer C 2020")
print(course.name)

In [None]:
df_TAs, df_teachers = get_teaching_personel(course, add_first_name=True, 
                          groups=["ta", "teacher"])

file_name = "sections_phy2053_summer2020.csv"
file_path = join(INPUT_DIR, file_name)
df_students, df_sections = get_students_from_sections(
                                course, 
                                section_ta_csv=file_path)
print("Completed importing students")

# Course Assignment for Exam

In [None]:
exam_number = 1
assignment = get_assignment(course, name="Exam "+str(exam_number))
assignment

In [None]:
df_subs = get_assignment_submissions(assignment, 
                               df_students=df_students, 
                               df_TAs=df_TAs, 
                               include_submission_history=True)
df_submitted = get_submitted_submissions(df_subs)
print(len(df_submitted))

In [None]:
df_submitted

In [None]:
# the following field contains the submitted answers of a student
df_submitted.iloc[19].submission_history[-1]["submission_data"]

In [None]:
## In the past I have found students with multiple submissions
assert "submission_history" in df_submitted.columns
answers_list = []
for index, submission in df_submitted.iterrows():
    user_id = submission.user_id
#     print(submission.sortable_name)
    student_name = submission["name"]
    if len(submission.submission_history)!=1:
        print(submission.sortable_name,
              len(submission.submission_history))
#         raise ValueError
    assert submission.submission_history[-1]["submission_data"] is not None
    sub_data = submission.submission_history[-1]["submission_data"]
    for question_data in sub_data:
        question_data["user_id"] = user_id
        question_data["student_name"] = student_name
        answers_list.append(question_data)
    
df_student_answers = pd.DataFrame(answers_list)
df_student_answers.head(20)

In [None]:
# df_student_answers = get_student_answers(df_submitted)   
# df_student_answers.head(20)

# Corresponding Quiz

The Canvas API, has two methods of accessing a quiz asssignment. Each exposes different attributes.

In [None]:
quiz = get_quiz(course, title="Exam "+str(exam_number))
quiz

**Note**: this fails for concluded courses (access denied). Same is true for the website

In [None]:
quiz_questions = list(quiz["object"].iloc[0].get_questions())

In [None]:
df_questions = df_from_api_list(quiz_questions, 
                                  drop_created_at=False, 
                                  bring_to_front=None)
df_multiple_choice_questions = df_questions[
            df_questions["question_type"]
                         =="multiple_choice_question"].copy()
df_multiple_choice_questions

In [None]:
df_multiple_choice_questions.iloc[23].question_text

In [None]:
question_ids = df_multiple_choice_questions[
    df_multiple_choice_questions.question_name
    .str.contains('Q1[1|2].*')].index.tolist()

question_ids
df_bad_questions = df_questions[df_questions.index.isin(question_ids)]
df_bad_questions

In [None]:
df_student_answers = df_student_answers[(df_student_answers
                                         .question_id
                                         .isin(question_ids))]

# df_student_answers["question_version"] = df_student_answers.apply(lambda x: 1 if x["question_id"]==question_id1 else 2, axis=1)
print(len(df_student_answers))

print(df_student_answers.question_id
                        .value_counts())

df_student_answers

In [None]:
df_answers_in_bad_questions = df_student_answers[
        df_student_answers
        .question_id.isin(question_ids)][["student_name", "user_id", "correct"]]

df_scores_in_bad_questions = (df_answers_in_bad_questions.groupby(
                                            ["student_name", "user_id"])
                                .sum().reset_index().rename(columns={"correct":
                                                             "scores_in_bad"})
                             )
df_scores_in_bad_questions["scores_in_bad"].value_counts()

In [None]:
quiz_submissions = list(quiz["object"].iloc[0].get_submissions())

In [None]:
## Remove Test student? is there one for quiz objects? 
# I have checked that it is not the last one
# df_quiz_subs = df_quiz_subs[:-1]
df_quiz_subs = df_from_api_list(quiz_submissions, 
                                drop_created_at=False, 
                                bring_to_front=None, 
                                set_index_id=False)
# from now on we want to make sure we keep all submissions
len_quiz_subs = len(df_quiz_subs)
df_quiz_subs = pd.merge(left=df_quiz_subs, 
                        right=df_students, 
                   left_on="user_id", right_on="id",
                    suffixes=("","_student"),
                   how="left",validate="m:1")
df_quiz_subs = pd.merge(left=df_quiz_subs, 
                        right=df_scores_in_bad_questions[["user_id","scores_in_bad"]], 
                   left_on="user_id", right_on="user_id",
                   how="left",validate="1:1")
assert len(df_quiz_subs)==len_quiz_subs, "Data were lost unexpectedly"
print(len(df_quiz_subs))  
df_quiz_subs.head(5)

In [None]:
df_quiz_subs.score.value_counts()

In [None]:
df_quiz_subs["updated_score"] = df_quiz_subs.score - df_quiz_subs.scores_in_bad
df_quiz_subs["updated_score"].value_counts()

In [None]:
df_new_scores = df_quiz_subs[["sortable_name", "name","score", "updated_score"]]
df_new_scores


In [None]:
output_path = join(OUTPUT_DIR , "phy2053_exam_1_new_scores.csv"
                          )

df_new_scores.to_csv(output_path, 
                     index=False)

# (Skip for multiple choice Exams) Exam Question Variables 

In [None]:
quiz_submissions[27]

In [None]:
# import requests
# result = requests.get(API_URL+"api/v1/"+"quiz_submissions/{}/questions/".format(21067791),
#       headers={'Authorization': 'Bearer {}'.format(access_token)})

# result.json().get("quiz_submission_questions", [])

In [None]:
async def gather_questions():
    async def get_sub_questions(submission_id):
        async with aiohttp.ClientSession() as session:
            async with session.get(API_URL+"api/v1/"+"quiz_submissions/{}/questions/".format(submission_id),
                                      headers={'Authorization': 'Bearer {}'.format(API_KEY)}) as resp:
                if resp.status==200:
                    all_sub_questions[submission_id] = await resp.json()
                else:
                    await resp.text()
                    print(resp.status)
    # need to deal with high watermark:
    # https://community.canvaslms.com/t5/Developers-Group/API-Rate-Limiting/ba-p/255845
    # suggestion of groups of 30 every 0.250 seconds:
    # https://community.canvaslms.com/t5/Developers-Group/API-Rate-Limiting/m-p/211140
    rate_limit = 30
    all_sub_questions = {}
    len_q_subs = len(quiz_submissions)
    bunchstart = 0
    while bunchstart < len_q_subs:
        print(bunchstart)
        coros = [get_sub_questions(submission_id) for submission_id in df_quiz_subs.id[bunchstart:bunchstart+rate_limit]]
        await asyncio.gather(*coros)
        await asyncio.sleep(0.250)
        bunchstart += rate_limit 
        
    return all_sub_questions

all_sub_questions = await gather_questions()

In [None]:
print(len(quiz_submissions))
print(len(all_sub_questions))
assert len(quiz_submissions)==len(all_sub_questions), "Data were lost. Decrease request rate."
all_sub_questions[df_quiz_subs.id.iloc[7]]['quiz_submission_questions'][2]

In [None]:
question_inputs_list = []
for submission_id, sub_qs in all_sub_questions.items():
    for question in sub_qs["quiz_submission_questions"]:
        if question["id"] in question_ids:
            variables_dict = {}
            
#             print(submission_id)
#             print(question["answers"][0])
            variables = question["answers"][0]["variables"]
            question_name = question['question_name']
            position = question['position']
#             print(variables)
            for variable in variables:
                variables_dict[variable["name"]] = float(variable["value"])
            variables_dict["input"] = variables_dict.copy()
            variables_dict["question_name"] = question_name
            variables_dict["position"] = position
            variables_dict["submission_id"] = submission_id
            question_inputs_list.append(variables_dict)
df_inputs = pd.DataFrame(question_inputs_list) 
df_inputs.head()

- Merge on submission_id with quiz_subs to add student_name and section_TA_first 
- Merge on student name with answers df.
- Write function that calculates the correct answer
- compare with text (i.e. student_answer) to get a revised correct indicator.
- compare with current correct indicator to decide whether they need regrading

In [None]:
df_inputs = pd.merge(left=df_inputs, 
                       right=(df_quiz_subs[["id", 
                                            "name", 
                                            "quiz_version",
                                            "sortable_name", 
                                            "class_number",
                                            "section_ta_first_name"]]
                              .rename(columns={"id":"submission_id"})), 
                       on="submission_id", 
                       how="left", 
                       validate="1:1")
df_inputs["input"] = df_inputs.apply(lambda x: {**x["input"],
                                                **{"quiz_version":
                                                   int(x["quiz_version"])
                                                  }
                                               }, 
                                     axis=1)

df_inputs

In [None]:
df_combined = pd.merge(left=df_student_answers, 
                       right=(df_inputs
                              .rename(columns={"name":"student_name"})
                             ), 
                       on="student_name", 
                       how="right", 
                       validate="1:1")
df_combined["input"] = df_combined.apply(lambda x: {**x["input"],
                                                    **{"question_id":
                                                       x["question_id"],
                                                       "position":
                                                       x["position"]},
                                                   }, axis=1)

df_combined


In [None]:
df_combined["input"].iloc[5]

# (Skip for multiple choice Exams) Calculate new answers

In [None]:
#phy2053 summer 2021 Quiz 1 Question 3 Version 1 was missing 1.2
def get_correct_time(d, t2, quiz_version=None, 
                        decimals=4, **_):
    answer = (d - 1.2*t2-0.5*0.1*t2**2)/1.2
    return round(answer,decimals)

#quiz 9 PHY2054 Spring 2021 Q1 both versions on Tuesday
def get_correct_B_field(I, r, quiz_version=None, 
                        decimals=4, **_):
    #Q1V1
    if quiz_version==25.0:
        answer = 1/(3.14*I*r*r/10000) * 0.985/0.174
    elif quiz_version==26.0:
        answer = 1/(3.14*I*r*r/10000)
    else:
        raise ValueError("Quiz version is invalid")
    return round(answer,decimals)

def get_correct_radius(I, B, quiz_version=None, decimals=2, **_):
    #Q1V2
    if quiz_version==25.0:
        answer = 100/np.sqrt(I*B) * np.sqrt(0.985/0.174)
    elif quiz_version==26.0:
        answer = 100/np.sqrt(I*B)
    
    return round(answer,decimals)

def get_correct_answer(input_dict):
    """
    Use indices of `texts_to_match` entries to get the 
    corresponding index of `question_ids`
    
    """
    if (input_dict["position"]==3 and 
        input_dict["question_id"]==question_ids[0]):
        return get_correct_time(**input_dict)
#     elif (input_dict["position"]==1 and 
#         input_dict["question_id"]==question_ids[1]):
#         return get_correct_radius(**input_dict)
    else:
        raise ValueError("Question version is invalid")
    
print(question_ids[0])

example_vars = {"d":66, "t2":19,
                "position":3, 
                "question_id":question_ids[0]}
print(example_vars["position"])
get_correct_answer(example_vars)

In [None]:
def is_correct(student_answer, correct_answer, 
               answer_tolerance_percentage="2%"):
    # TODO: check for max truncation error of correct answer and raise warning if too big
    tolerance = float(answer_tolerance_percentage.strip("%"))/100
    assert abs(correct_answer) > 1E-4, "Correct answer is very close to zero, so I cannot calculate relative difference"
    rel_diff = (student_answer - correct_answer) / correct_answer
    if abs(rel_diff) < tolerance:
        return True
    else:
        return False

is_correct(student_answer=6, correct_answer=6.1, 
           answer_tolerance_percentage="2%")

In [None]:
df_combined["correct_answer"] = df_combined.apply(lambda x:get_correct_answer(x["input"]),axis=1)
df_combined["student_answer"] = (df_combined["text"]
                                     .str.replace(',', '')
                                     .replace("","NaN")
                                     .astype(float)
                                )
df_combined["revised_correct"] = df_combined.apply(lambda x: is_correct(x["student_answer"], x["correct_answer"]), axis=1)
df_combined["needs_manual_review"] = df_combined.apply(lambda x: np.logical_xor(x["revised_correct"], x["correct"]), axis=1)
df_combined

In [None]:
df_manual_review = df_combined[df_combined["needs_manual_review"]]
print(len(df_manual_review))
df_manual_review

In [None]:
df_manual_review[df_manual_review["section_ta_first_name"]=="Ioannis"].head()

In [None]:
manual_review_ta_counts = df_manual_review["section_ta_first_name"].value_counts()
manual_review_ta_counts

In [None]:
df_manual_review[["correct","revised_correct"]].value_counts()

In [None]:
df_manual_review["quiz_version"].value_counts()

In [None]:
# Change store to files to True in order to save the results of the analysis
store_results = True
for ta in manual_review_ta_counts.index:
    df_to_save = (df_manual_review[df_manual_review["section_ta_first_name"]==ta]
                  [["sortable_name",
                    "class_number",
                    "quiz_version",
                    "question_name",
                    "position",
                    "question_id",
                    "correct_answer",
                    "student_answer",
                    "correct",
                    "revised_correct",
                    "needs_manual_review"]]
                  .rename(columns={"correct":"old_correct"})
                  .sort_values(by=["class_number","sortable_name"])
                 )
    if store_results:
        output_path = join(OUTPUT_DIR , "{}_needs_manual_review_quiz_{}.csv"
                                           .format(ta.lower(), quiz_number)
                          )
        df_to_save.to_csv(output_path, 
                          index=False)
df_to_save

# Fix Grades

Skipping for now.

See [`QuizSubmission.update_score_and_comments`](https://canvasapi.readthedocs.io/en/stable/quiz-ref.html#canvasapi.quiz.QuizSubmission.update_score_and_comments).