# **Canvas API Exploration Notebook**

### **Step 0. Run ***pip install -r requirements.txt*** in your Terminal** 
#### (you may need to include either a "python" or "python -m" prefix)

### **Step 1. Import Required Libraries**

In [1]:
%load_ext heat
%load_ext line_profiler

In [2]:
# canvasapi used as API Wrapper
# Pandas used as Data Exploration and Manipulation Tool

from canvasapi import Canvas
import pandas as pd

### **Step 2. Initialize Constants and Master Canvas Object**

In [3]:
# Canvas API URL
API_URL = "https://uvu.instructure.com"

# Canvas API key (follow instructions on README file)
API_KEY = open("API_KEY.txt", "r").read().strip()

# Initialize a new Canvas object
canvas = Canvas(API_URL, API_KEY)

# Canvas User ID
USER_ID = canvas.get_user('self').id

### **Step 3. Initialize Lists**

In [13]:
from canvasapi.course import Course
from canvasapi.assignment import Assignment
# These lists will act as temporary data stores and be the blueprints for the tables to be used in the Transactional Database

courses = []
course_objects: list[Course] = []
course_assignments = []
course_assignment_objects: list[Assignment] = []
course_assignment_submissions = []

### **Step 4. Load Data from All Courses into DataFrame Object**

In [14]:
# iterate through all available courses, append the raw strings to a list of dictionaries, append the Course objects to a separate list
for course in canvas.get_courses():
    courses.append({'course_id':course.id, 'course_name':course.name})
    course_objects.append(course)

# output list of dictionaries as Pandas DataFrame
courses = pd.DataFrame(courses)

# view results
courses

Unnamed: 0,course_id,course_name
0,521623,ART-1010-X02 | Summer 2021
1,533121,BIOL-1010-X01-X07-X08-Fall 2021-XLIST | Egan
2,516477,BIOL-1610-X05 | Spring 2021
3,512661,BIOL-1615-209 | Spring 2021
4,523168,CHEM-1010-001 | Summer 2021 B2
5,492699,COMM-1020-J40 | Spring 2020 HSS
6,516748,COMM-2110-X08 | Spring 2021
7,553394,CS-305G-601 | 2023 Spring - Full Term
8,555857,CS-496R-001 | 2023 Spring - Full Term
9,503456,CS-1400-X03 | Fall 2020


### **Step 5. Load All Assignments from All Courses into DataFrame Object**

In [15]:
# OUTER FOR LOOP: iterate through all available courses
# INNER FOR LOOP: for each course, iterate over all available assignments, append full Assignment Objects to list, append the raw strings to a list of dictionaries
for course in course_objects:
    for assignment in course.get_assignments(): # Uses a linked list of GET requests. Can't be parallelized.
        course_assignment_objects.append(assignment)
        course_assignments.append({'course_id':course.id, 'assignment_id':assignment.id, 'assignment_name':assignment.name, 'description':assignment.description, 'submitted':assignment.has_submitted_submissions, 'points_possible':assignment.points_possible, 'submission_types':assignment.submission_types})

# output list of dictionaries as Pandas DataFrame
course_assignments = pd.DataFrame(course_assignments)

# view results
course_assignments

Unnamed: 0,course_id,assignment_id,assignment_name,description,submitted,points_possible,submission_types
0,521623,5499199,1.10 Assignment,"<link rel=""stylesheet"" href=""https://instructu...",True,20.0,[online_upload]
1,521623,5499195,1.3 Discussion Assignment,"<link rel=""stylesheet"" href=""https://instructu...",True,10.0,[discussion_topic]
2,521623,5499196,2.9 Discussion Assignment,"<link rel=""stylesheet"" href=""https://instructu...",True,20.0,[discussion_topic]
3,521623,5499202,3.8 Assignment: Feldman Method Critique,"<link rel=""stylesheet"" href=""https://instructu...",True,35.0,[online_upload]
4,521623,5499201,3.8 Assignment: Extra Credit Peer Review,"<link rel=""stylesheet"" href=""https://instructu...",True,0.0,[online_upload]
...,...,...,...,...,...,...,...
1786,519681,5322841,Exam 1 - proctorio (Remotely Proctored),"<link rel=""stylesheet"" href=""https://instructu...",True,0.0,[online_quiz]
1787,519681,5322843,Exam 2 - proctorio (Remotely Proctored),,True,0.0,[online_quiz]
1788,519681,5322845,Exam 3 - proctorio (Remotely Proctored),"<link rel=""stylesheet"" href=""https://instructu...",True,0.0,[online_quiz]
1789,519681,5322844,Exam 4 - proctorio (Remotely Proctored),,True,0.0,[online_quiz]


### **Step 6. Load All Assignment Submissions from All Assignments in All Courses into List**
#### **Current Bottleneck**

In [16]:
from concurrent.futures import ThreadPoolExecutor

def fn():
    with ThreadPoolExecutor(max_workers=32) as ex:
        submissions = ex.map(lambda assignment: assignment.get_submission('self'), course_assignment_objects)
        for submission in submissions:
            course_assignment_submissions.append(submission)

In [17]:
%lprun -f fn fn()

Timer unit: 1e-07 s

Total time: 23.1123 s
File: C:\Users\braxt\AppData\Local\Temp\ipykernel_28860\1608372485.py
Function: fn at line 3

Line #      Hits         Time  Per Hit   % Time  Line Contents
     3                                           def fn():
     4         1        458.0    458.0      0.0      with ThreadPoolExecutor(max_workers=32) as ex:
     5         1    1675074.0    2e+06      0.7          submissions = ex.map(lambda assignment: assignment.get_submission('self'), course_assignment_objects)
     6      1792  229437630.0 128034.4     99.3          for submission in submissions:
     7      1791       9571.0      5.3      0.0              course_assignment_submissions.append(submission)

In [None]:
from concurrent.futures import ThreadPoolExecutor

# for each Assignment Object in the "course_assignment_objects" list, iterate over each available submission made by the user (you)
with ThreadPoolExecutor(max_workers=32) as ex:
    submissions = ex.map(lambda assignment: assignment.get_submission('self'), course_assignment_objects)
    for submission in submissions:
        course_assignment_submissions.append(submission)

### **Step 7. Load All Submission Data into DataFrame Object**

In [None]:
submission_info = []
for submission in course_assignment_submissions:
    # only include assignments that have been completed
    try:
        if submission.attempt != None:
            submission_info.append({'assignment_id':submission.assignment_id, 'attachments':submission.attachments, 'attempt':submission.attempt, 'body':submission.body, 'due_date':submission.cached_due_date, 'grade':submission.entered_grade, 'score':submission.entered_score, 'extra_attempts':submission.extra_attempts, 'submission_id':submission.id, 'late':submission.late, 'submission_type':submission.submission_type, 'submitted_at':submission.submitted_at})
    except:
        pass
# output list of dictionaries as Pandas DataFrame
submissions = pd.DataFrame(submission_info)

# reorder columns in DataFrame
submissions = submissions[['assignment_id', 'submission_id', 'submission_type', 'body', 'attachments', 'attempt', 'extra_attempts', 'due_date', 'grade', 'score', 'late', 'submitted_at']]

# view results
submissions

### **Output All DataFrame Objects to CSV Files**

In [None]:
import os

courses_file = f'{USER_ID}_courses'
assignments_file = f'{USER_ID}_assignments'
submissions_file = f'{USER_ID}_submissions'

courses_file = os.path.join('test_files', courses_file)
assignments_file = os.path.join('test_files', assignments_file)
submissions_file = os.path.join('test_files', submissions_file)

courses.to_parquet(courses_file, index=False, compression='snappy')
course_assignments.to_parquet(assignments_file, index=False, compression='snappy')
submission.drop(columns=['attachments'], inplace=True)
submissions.to_parquet(submissions_file, index=False, compression='snappy')