In [1]:
import httpx
import requests
import json
import pandas as pd
from datetime import datetime
from tzlocal import get_localzone
from fastapi import HTTPException

"""
Decodes Canvas API responses into DataFrames for downstream use.
"""

def parse_iso_timestamps(data : pd.DataFrame) -> pd.DataFrame:
    """
    Replace all string timestamps in a DataFrame with datetime objects.
    All timestamps must comply with ISO-8601.

    Args:
        data (DataFrame): DataFrame containing ISO-8601 string timestamps.

    Returns:
        data (DataFrame): DataFrame containing datetime timestamps.
    """

    # Obtain all columns which can be converted
    # into a timestamp through ugly brute-forcing
    timestamp_columns = []
    for key, value in data.iloc[0].to_dict().items():
        if type(value) is str:
            try:
                datetime.fromisoformat(value)
                timestamp_columns.append(key)
            except:
                pass

    def isotime_to_timestamp(value : str | None, use_local_timezone : bool = True, as_string : bool = False):
        if type(value) is not str: return None
        
        time = datetime.fromisoformat(value)
        if use_local_timezone:
            time = time.astimezone(get_localzone())

        if as_string:
            time = time.strftime("%A, %d %B %Y, %I:%M %p")
            
        return time
    
    for column in timestamp_columns:
        data[column] = data[column].map(isotime_to_timestamp)

    return data

def process_canvas_dataframe(response : pd.DataFrame) -> pd.DataFrame:
    response = parse_iso_timestamps(response) # Convert string timestamps to object
    response = response.dropna(axis=1, how="all") # Drop columns with entirely NA values
    return response
    
def decode_canvas_response(response : requests.Response | httpx.Response | list[dict]) -> pd.DataFrame:
    """
    Converts raw Canvas API response into a DataFrame for downstream use,
    or raises Exception if the response is invalid.

    Args:
        response (Response | dict): The API response obtained from Canvas.

    Returns:
        response_data (DataFrame): The response data.
    """
    
    if type(response) is list:
        response_list = response

    elif type(response) is requests.Response or type(response) is httpx.Response:
        if not response.status_code == 200:
            raise HTTPException(
                status_code = response.status_code,
                detail=response.text
            )

        try:
            content = response.content
            if not content: raise HTTPException(status_code=400, detail="Canvas API response contained no content")
        except: raise HTTPException(status_code=400, detail="Canvas API response contained no content")

        # Canvas API will always respond in JSON format,
        # but we obtain the response in bytes, so it
        # must be decoded into a raw string and then
        # encoded into a JSON object.
        
        content = content.decode('utf-8')
        response_json : dict | list = json.loads(content)

        if type(response_json) is list:
            response_list = response_json
        else:
            response_list = [response_json]

    else:
        raise ValueError("Response must be either a requests/httpx Response object or a list of dicts.")
    
    # Empty response
    if not response_list:
        return pd.DataFrame([])
    
    response_df = pd.DataFrame(response_list)
    response_df = process_canvas_dataframe(response_df)
    return response_df

async def query_canvas(path : str, magic : str, provider : str, params : dict = {}, max_items : int = 100, timeout : int = 60) -> pd.DataFrame:
    """
    Obtain data from the Canvas API using a HTTP GET request.
    See https://developerdocs.instructure.com/services/canvas/resources for more information.

    Args:
        path (str): The Canvas v1 API path suffix. E.g., "courses" for "provider.instructure.com/api/v1/courses"
        magic (str): The magic key generated by the wizard to enter the gate.
        provider (str): The name of the institution which has installed Canvas in lowercase.
        params (dict, optional): Additional request parameters for the API to use. Specific to each resource. Defaults to an empty dictionary.
        max_items (int, optional): The maximum number of items to return. Defaults to 100.
        timeout (int, optional): How many seconds to allow the API to respond before timing out. Defaults to 60.

    Raises:
        HTTPException: If the request does not return status 200 or times out, a HTTPException is raised.
    
    Returns:
        data (DataFrame): The response from Canvas in DataFrame form.
    """
    params["access_token"] = magic
    params["per_page"] = max_items
    
    path = f"https://{provider}.instructure.com/api/v1/{path}"

    try:
        async with httpx.AsyncClient(timeout=timeout) as client:
            response = await client.get(path, params=params)
    except httpx.ReadTimeout:
        raise HTTPException(
            status_code = 408,
            detail = f"{provider.capitalize()} Canvas API GET request timed out after {timeout} seconds. Request URL: {path}"
        ) 
    
    if not response.status_code == 200:
        raise HTTPException(
            status_code = response.status_code,
            detail=response.text
        )
    
    try:
        data : pd.DataFrame = decode_canvas_response(response)
    except Exception as e:
        raise HTTPException(
            status_code = 500,
            detail = str(e)
        )
    
    return data

In [2]:
# Obtain Canvas access token from .env file

import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
token = os.environ.get("TOKEN")

In [3]:
params = {"include":"term"}
raw_units = await query_canvas(path="courses", magic=token, provider="swinburne", max_items=50, params=params)

In [21]:

raw_units = await query_canvas(path="courses", magic=token, provider="swinburne", max_items=50, params=params)

Unnamed: 0,id,name,course_code,account_id,created_at,start_at,default_view,enrollment_term_id,is_public,grading_standard_id,...,original_name,blueprint,template,enrollments,hide_final_grades,workflow_state,restrict_enrollments_to_course_dates,overridden_course_visibility,locale,course_format
0,61564,1] Capstone Project B,2024-HS2-COS40006-Computing Technology Project...,85,2024-04-24 09:43:51+10:00,,wiki,779,False,87.0,...,2024-HS2-COS40006-Computing Technology Project B,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",False,available,False,,,
1,52786,1] CTIP,2023-HS2-COS30049-Computing Technology Innovat...,85,2023-04-14 08:52:32+10:00,,wiki,715,False,87.0,...,2023-HS2-COS30049-Computing Technology Innovat...,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",False,available,False,,,
2,65692,1] Honours B,2025-HS2-NPS40013-Honours Project Part B-H1,79,2024-12-23 10:08:48+11:00,,wiki,948,False,87.0,...,2025-HS2-NPS40013-Honours Project Part B,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",True,available,False,,,
3,49155,1] Intro to AI,2023-HS1-COS30019-Introduction to Artificial I...,85,2023-01-10 10:45:32+11:00,,wiki,683,,87.0,...,2023-HS1-COS30019-Introduction to Artificial I...,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",False,available,False,,,
4,43430,1] Intro to Programming,2022-HS1-COS10009-Introduction to Programming-H1,85,2022-02-01 12:34:42+11:00,2022-02-18T05:02:38Z,wiki,587,False,87.0,...,2022-HS1-COS10009-Introduction to Programming,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",True,available,False,,,
5,66785,1] Linear Algebra,2025-HS1-MTH10013-Linear Algebra and Applicati...,78,2025-01-16 10:31:33+11:00,,modules,946,False,87.0,...,2025-HS1-MTH10013-Linear Algebra and Applications,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",True,available,False,,,
6,54180,2] Cloud Computing Architecture,2023-HS2-COS20019-Cloud Computing Architecture-H1,85,2023-06-01 10:59:33+10:00,,wiki,715,False,87.0,...,2023-HS2-COS20019-Cloud Computing Architecture,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",True,available,False,,,
7,44774,2] Comp Systems,2022-HS2-COS10004-Computer Systems-H1,85,2022-03-22 14:27:32+11:00,,wiki,600,,87.0,...,2022-HS2-COS10004-Computer Systems,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",False,available,False,,,
8,48754,2] Comp Tech Des Project,2023-HS1-COS20031-Computing Technology Design ...,85,2022-12-12 14:48:37+11:00,,wiki,683,,87.0,...,2023-HS1-COS20031-Computing Technology Design ...,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",False,available,False,,,
9,40745,2] CT Inquiry Project,2022-HS1-COS10026-Computing Technology Inquiry...,85,2021-12-08 14:59:47+11:00,2022-02-21T05:47:42Z,wiki,587,False,87.0,...,2022-HS1-COS10026-Computing Technology Inquiry...,False,False,"[{'type': 'student', 'role': 'StudentEnrollmen...",True,available,False,,,


In [17]:
raw_terms = decode_canvas_response(raw_units["term"].to_list())
raw_terms.drop_duplicates(subset="id")

Unnamed: 0,id,name,start_at,end_at,workflow_state,created_at
0,779,2024 Semester 2,2024-07-15 00:00:00+10:00,2024-12-31 00:00:00+11:00,active,2023-10-04 14:25:02+11:00
1,715,2023 Semester 2,2023-07-17 00:00:00+10:00,2024-01-02 00:00:00+11:00,active,2022-10-25 13:00:35+11:00
2,948,2025 Semester 2,2025-07-21 00:00:00+10:00,2026-01-06 00:00:00+11:00,active,2024-06-20 15:20:32+10:00
3,683,2023 Semester 1,2023-02-13 00:00:00+11:00,2023-08-01 00:00:00+10:00,active,2022-08-23 09:15:02+10:00
4,587,2022 Semester 1,2022-02-21 11:00:00+11:00,2022-06-26 10:00:00+10:00,active,2021-06-07 08:51:01+10:00
5,946,2025 Semester 1,2025-02-17 00:00:00+11:00,2025-08-05 00:00:00+10:00,active,2024-06-04 08:50:03+10:00
7,600,2022 Semester 2,2022-07-25 00:00:00+10:00,2023-01-03 00:00:00+11:00,active,2021-09-06 12:58:30+10:00
19,1,Default term,NaT,NaT,active,2017-03-09 20:24:49+11:00
20,756,2024 Semester 1,2024-02-12 00:00:00+11:00,2024-07-30 00:00:00+10:00,active,2023-06-06 10:18:33+10:00
22,711,2023 Winter,2023-06-05 00:00:00+10:00,2023-09-12 00:00:00+10:00,active,2022-09-15 15:10:32+10:00
