In [None]:
import numpy as np
import pandas as pd

# from pathvalidate import sanitize_filename
from canvasapi import Canvas
from canvasapi.course import Course
from canvasapi.module import Module, ModuleItem

# Initialize Canvas
You need to create an access token to use the Canvas API.

You can do this in account settings (https://canvas.brown.edu/profile/settings), under the "approved integrations" heading.

Paste your token where it says 'your_token_here'

In [None]:
token = <your_token_here>
canvas = Canvas('https://canvas.brown.edu', token)

# Print list of courses and select course to analyze
The cell below prints a list of the courses you're enrolled in.
Select a course by pasting the 6-7 digit course code in the following cell, where it says 'your_course_here'

In [None]:
# print list of courses; select course in cell below by the number in parentheses
courses = canvas.get_courses()
for course in courses:
    print(course)

In [None]:
course_code = <your_course_here>

course = canvas.get_course(course_code)

# Analyze discussions
Canvas is organized in a set of nested pages. 
Discussion topics include two levels of posts: "entries" and "replies".

The following cells assume that your course lists discussion pages under "groups". You can tell if this is the case by looking at the URL of a discussion page. Here's what a discussion topic that is nested under a "group" looks like: https://canvas.brown.edu/groups/192040/discussion_topics/3754897

If your course is organized differently (eg: it doesn't include groups), then alter the for-loops accordingly.


## Set up discussion dataset
First, we'll iterate through all course groups, and then through discussion topics, entries, and replies. This sets up a "list of lists" that we can analyze later.

Finally, the cell below will return a list of discussion topics for the first group; you can use this list to select discussion topics to analyze inteh following section.

Again, you'll have to alter the for-loop for different ways of organizing course data.

In [None]:
# get discussion groups
groups = course.get_groups()

# get discussion topics, entries, and replies
for group in groups:
    group.topics = group.get_discussion_topics()
    for topic in group.topics:
        topic.entries = topic.get_topic_entries()
        for entry in topic.entries:
            entry.replies = entry.get_replies()

            # print list of discussion topics
for topic in groups[0].topics:
    print(topic)    

## Analyze discussion data
Now, we'll select an individual discussion topic (chosen under "topic index", below); iterate through each group; grab the usernames associated with all entries and replies; and return a table of how many times each student posted an original entry, and how many times they replied to other entries.

Again, you'll have to alter the for-loop for different ways of organizing course data.

In [None]:
# set the topic index to the discussion topic you want to analyze, from the above list
# topic index is the row number of the topic you want to analyze; rows start at '0'
topicindex = <your_topic_index_here>


# count number of posts and replies for each username, across all groups, for the discussion topic you selected above
posters = []
replies = []

for group in groups:
    for entry in group.topics[topicindex].entries:
        #print("Poster: ", entry.user_name)
        posters.append(entry.user_name)
        for reply in entry.replies:
            #print("Reply: ", reply.user_name)
            replies.append(entry.user_name)

            
# count posts per person
poster_names, poster_freq = np.unique(posters,  
                      return_counts = True)
post_df = pd.DataFrame({'Names': poster_names, 'Posts': poster_freq})

# count replies per person
reply_names, reply_freq = np.unique(replies,  
                      return_counts = True) 
reply_df = pd.DataFrame({'Names': reply_names, 'Replies': reply_freq})

# merge posts and replies
df = pd.DataFrame.merge(post_df, reply_df, how='outer')
df = df.fillna(0)

# convert to int64
df['Posts'] = df['Posts'].astype('int64')
df['Replies'] = df['Replies'].astype('int64')


# print table of posts and replies, and mean for each
print(df)

print('::::::Mean posts: ', np.mean(df['Posts']))

print('::::::Mean replies: ', np.mean(df['Replies']))