In [None]:
%cd ~/Dropbox/CanvasHacks

#Plotting 
%matplotlib inline
from matplotlib import pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf')

import matplotlib.dates as mdates
import matplotlib.ticker as mticker

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette(sns.color_palette('plasma'))

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf')

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, Band, Span
# output to static HTML 
output_notebook()

#numbers
import numpy as np
import pandas as pd
pd.options.display.max_rows = 999
pd.set_option('precision', 2)
pd.options.plotting.matplotlib.register_converters = True


import datetime

from CanvasHacks import environment
from CanvasHacks.Api.RequestTools import *
from CanvasHacks.Api.UrlTools import *
from CanvasHacks.Configuration import InteractiveConfiguration
# import CanvasHacks.GradingTools as GT
# import CanvasHacks.DownloadProcessingTools as PT

# File system
from CanvasHacks.TimeTools import getDateForMakingFileName
from CanvasHacks.Files.FileTools import  create_folder, makeDataFileIterator
from CanvasHacks.Files.JournalsFileTools import get_journal_folders, make_folder_list, calculate_journal_counts
from CanvasHacks.Files.QuizReportFileTools import sort_frames_by_age, get_newest_data

# from CanvasHacks.JournalsFileTools import journal_folder_name, create_folder
# from CanvasHacks.FileTools import getDateForMakingFileName

# Canvas api
from canvasapi import Canvas
from canvasapi.quiz import QuizReport, Quiz
from canvasapi.requester import Requester
from canvasapi.conversation import Conversation

# Initialize a Canvas api objects
canvas = Canvas(environment.CONFIG.canvas_url_base, environment.CONFIG.canvas_token)
requester = Requester(environment.CONFIG.canvas_url_base, environment.CONFIG.canvas_token)

# Configuration
from CanvasHacks.Definitions.skaa import Review, InitialWork, MetaReview
from CanvasHacks.Definitions.unit import Unit #Assignment

# Exceptions
# from CanvasHacks.Errors.review_associations import AlreadyAssigned, SubmissionIncomplete

# Models
from CanvasHacks.Models.student import Student
from CanvasHacks.Models.student import student_from_canvas_user, ensure_student

# Repos
from CanvasHacks.Repositories.DataManagement import DataStore
from CanvasHacks.Repositories.quizzes import QuizRepository, ReviewRepository
from CanvasHacks.Repositories.codes import AccessCodeRepo
from CanvasHacks.Repositories.reviewer_associations import assign_reviewers, AssociationRepository
from CanvasHacks.Repositories.students import StudentRepository
from CanvasHacks.Repositories.factories import WorkRepositoryLoaderFactory


# Storage
from CanvasHacks.DAOs.sqlite_dao import SqliteDAO

# Widgets
from CanvasHacks.Widgets.ConsolidatedTextOutput import make_assignment_header, make_consolidated_text_fields
from CanvasHacks.Widgets.InputFields import make_course_ids_input, make_canvas_token_input, make_canvas_url_input, make_general_reset_button
from CanvasHacks.Widgets.AssignmentSelection import make_assignment_chooser, view_selected_assignments, view_ungraded_assignments
from CanvasHacks.Widgets.LiveSelection import make_test_selector
from CanvasHacks.Widgets.AssignmentSelection import make_unit_chooser


from CanvasHacks.Repositories.status import FeedbackStatusRepository, InvitationStatusRepository
from CanvasHacks.Repositories.students import StudentRepository
from CanvasHacks.Models.status_record import ComplexStatusRecord, FeedbackReceivedRecord, InvitationReceivedRecord,\
    StatusRecord

from CanvasHacks.SkaaSteps.ISkaaSteps import IStep


# Plotting
from CanvasHacks.Text.VisualizationTools import rotate_x_labels
# from CanvasHacks.Repositories.quizzes import fix_forgot_answers

import inspect
def look_inside(obj):
    print(inspect.getmembers(obj, lambda a:not(inspect.isroutine(a))))
    
    
SEMESTER_NAME = 'S20'
LOC = '{}/Box Sync/TEACHING/Phil 305 Business ethics/Phil305 S20'.format(environment.ROOT)# placeholder for where the access codes are stored
ACCESS_CODES_FP = "{}/{}-assignment-access-codes.xlsx".format(LOC, SEMESTER_NAME)
    
    
# LIKERT_PLOT_ORDER = ['Forgot', 'Strongly disagree', 'Disagree', 'Agree', 'Strongly agree']
# LIKERT_NUM_MAP = {'Forgot' : 0, 'Strongly disagree': 1, 'Disagree': 2, 'Agree': 3, 'Strongly agree': 4}


In [None]:
CLASS_IDS  = {
    'F19': [62657, 67473, 62660],
    'F18': [41179, 41180, 41181],
    'S19': [67531],
    'S20': [77522],
    'F20' : [79991] 
}

# End of semester data archiving

In [None]:
from CanvasHacks.Assessment.api import get_all_essay_assigns_for_class, store_course_essays, store_course_reviews
from CanvasHacks.Api.RequestTools import get_all_course_assignments
from CanvasHacks.Definitions.skaa import InitialWork
from CanvasHacks.Assessment.processing import process_essay_entries
from CanvasHacks.Assessment.files import EssayFiles

from CanvasHacks.Assessment.api import get_all_journal_assigns_for_class, store_course_journals
from CanvasHacks.Assessment.processing import process_journal_entries
from CanvasHacks.Assessment.files import JournalFiles #make_content_filepath, make_bag_filepath, make_week_iterator, 

#     'F20' : [79991] 
# SEMESTER_NAME = 'F20'
 
SEMESTER_NAME = 'S21'
# courses_to_get = [  79991]
courses_to_get = environment.CONFIG.course_ids

## Essays

In [None]:
# Download and store
for cid in courses_to_get:
    store_course_essays(cid, SEMESTER_NAME, start_unit=1)

# Process into bags
filename_handler = EssayFiles()

fiter = filename_handler.make_content_file_iterator()
existing = filename_handler.bag_files

while True:
    with open(next(fiter), 'r') as f:
        print("Processing ", f.name.split('/')[-1:])
        entries = json.load(f)
        process_essay_entries(entries, existing)

## Journals

In [None]:
# Download and store
for cid in courses_to_get:
    try:
        store_course_journals(cid, SEMESTER_NAME, start_week=1)
    except:
        pass

In [None]:
# Process into bags
filename_maker = JournalFiles()

fiter = filename_maker.make_content_file_iterator()
existing = filename_maker.bag_files

while True:
    with open(next(fiter), 'r') as f:
        print("Processing ", f.name.split('/')[-1:])
        entries = json.load(f)
        process_journal_entries(entries, existing)

## Reviews

In [None]:
store_course_reviews(courses_to_get[0], SEMESTER_NAME, start_unit=1)


In [None]:
make_unit_chooser(num_units=8)

In [None]:
environment.CONFIG.unit.components

In [None]:

unit_numbers = [i for i in range(1,9)]
unit_numbers

In [None]:
# course=environment.CONFIG.course, 


course=environment.CONFIG.course 



unit_numbers = [i for i in range(1,9)]

for unit_number in unit_numbers:
    environment.CONFIG.set_unit_number(unit_number)
    environment.CONFIG.initialize_canvas_objs()
    environment.CONFIG.unit = Unit(environment.CONFIG.course, unit_number)
    
    if len(environment.CONFIG.unit.components) == 0:
        print('k')
        continue
    
    print('j')

    
# activity=environment.CONFIG.unit.review


In [None]:
def activity_type(activity):
    if isinstance(activity, MetaReview):
        return 'metareview'
    if isinstance(activity, Review):
        return 'review'
activity_type(activity)

In [None]:
              
                                              
reviewRepo = WorkRepositoryLoaderFactory.make(course=course, 
                                              activity=activity, 
                                              save=False, 
                                              download=False,
                                             rest_timeout=1)

In [None]:
to_drop = [ 'n correct', 'n incorrect', 'score_x',  'score_y', 'workflow_state']
d = reviewRepo.data.copy(deep=True)
d['unit'] = environment.CONFIG.unit.unit_number
d['activity_type'] = activity_type(activity)

In [None]:
d

In [None]:
#  Download and store
for cid in courses_to_get:
    try:
        store_course_reviews(cid, SEMESTER_NAME, start_week=1)
    except:
        pass

In [None]:
# Process into bags
filename_maker = ReviewFiles()

fiter = filename_maker.make_content_file_iterator()
existing = filename_maker.bag_files

while True:
    with open(next(fiter), 'r') as f:
        print("Processing ", f.name.split('/')[-1:])
        entries = json.load(f)
        process_review_entries(entries, existing)

# Monitoring student progress in the class
## Determine completions per day


In [None]:

make_unit_chooser(num_units=8)

In [None]:
s = IStep(unit=environment.CONFIG.unit)
s._initialize_db()

review = environment.CONFIG.unit.review
essay = environment.CONFIG.unit.initial_work
inv = InvitationStatusRepository( s.dao, review )
fr = FeedbackStatusRepository( s.dao, essay )

invites = inv.get_daily_counts()
fdsnt = fr.get_daily_counts()
# invites

nm = invites.activity_name[0]
fdsnt.set_index(['activity_id', 'activity_name'], inplace=True)
invites.set_index(['activity_id', 'activity_name'], inplace=True)
counts = pd.merge(invites, fdsnt, left_on='sent_at', right_on='sent_at')
counts['activity_name'] = nm


In [None]:
counts

In [None]:
make_assignment_chooser()

## All submissions from all unit assignments

In [None]:
from CanvasHacks.Repositories.submissions import AssignmentSubmissionRepository
unit_nums = [u for u in range(1, 7)]

counts = {}
for u in unit_nums:
    environment.CONFIG.set_unit(u)
    unit = environment.CONFIG.unit
    ucs = []
    for c in unit.components:
        assignment = environment.CONFIG.course.get_assignment( c.id )
        ar = AssignmentSubmissionRepository(assignment)
        ucs.append(ar.get_daily_counts(activity_name=c.activity_name))
    ucs = pd.concat(ucs)
    counts[u] = ucs
# # counts
#     for assignment_id, assignment_name in environment.CONFIG.assignments:
#     assignment = environment.CONFIG.course.get_assignment( assignment_id )
#     ar = AssignmentSubmissionRepository(assignment)
#     counts.append(ar.get_daily_counts(activity_name=assignment_name))
# counts = pd.concat(counts)
len(counts.keys())

In [None]:
# fig, axes = plt.subplots(nrows=6, figsize=(10,10))
# for u in unit_nums:

#     c = counts[u].reset_index()

#     g = sns.relplot(x="submitted_at", y="num_submissions",  data=c, hue='activity_name', ax=axes[u-1])
#     # g.fig.set_title('Unit {}')
# fig.autofmt_xdate()

In [None]:
submission_data = []
for unum, frame in counts.items():
    frame['unit'] = unum
    frame['day'] = frame.index.day_name()
    submission_data.append(frame)
submission_data = pd.concat(submission_data)

In [None]:
DAY_ORDER = ['Monday','Tuesday','Wednesday', 'Thursday', 'Friday', 'Saturday',  'Sunday']

In [None]:
p = sns.catplot(x="day", y="num_submissions", 
            order=DAY_ORDER, 
            hue="unit",
            kind='bar',
            data=submission_data, height=4, aspect=2, legend_out=False);
rotate_x_labels(p.ax)
p.ax.legend(loc='upper center', title='Unit')
p.fig.tight_layout()

## Cumulative submissions

In [None]:
submission_data.groupby('activity_name').num_submissions.sum().plot()

In [None]:
def plot_cumulative_completions_for_unit(submission_data, unit_number):
    d = submission_data[submission_data.unit == unit_number]
    d = pd.DataFrame(d.groupby([ 'activity_name']).sum()).reset_index()
    source = ColumnDataSource(d)
    
    p = figure(
        plot_width=1000,
        plot_height=400,
#         x_range=group,
        title="Cumulative submission counts")#,
#         x_axis_type='datetime')  #,
    #            tooltips=TOOLTIPS)

    p.vbar(
            x='activity_name',
            top='num_submissions',
            color="green",
            width=0.4,
    #     source=group,
    #     fill_color=index_cmap,
            source=source,
            alpha=0.8)#,
    #         legend_label=f'Daily new {title_word}')
    show(p)
    
    
plot_cumulative_completions_for_unit(submission_data, 5)

In [None]:
p = sns.catplot(x="activity_name", y="num_submissions", 
            hue="unit",
            kind='bar',
            data=gd.reset_index(), height=4, aspect=2, legend_out=False);
rotate_x_labels(p.ax)
p.ax.legend(loc='upper center', title='Unit')
p.fig.tight_layout()

In [None]:

# sns.set_palette(sns.color_palette('plasma'))
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap

counts_comb.unit = counts_comb.unit.astype(str)
# group = counts_comb.groupby(by=[ 'unit', 'day'])       
# group = df.groupby(by=['cyl', 'mfr'])

# index_cmap = factor_cmap('day_unit', palette=Spectral5, factors=sorted(counts_comb.unit.unique()), end=1)

# p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
#            x_range=group, toolbar_location=None, tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")])


group = pd.DataFrame(counts_comb.groupby(['unit', 'day']).num_submissions.sum())

# group = counts_comb.groupby('unit')


d=counts_comb
source = ColumnDataSource(group)
p = figure(
        plot_width=1000,
        plot_height=400,
    x_range=group,
        title="Daily submission counts")#,
#         x_axis_type='datetime')  #,
    #            tooltips=TOOLTIPS)


#     source2 = ColumnDataSource(frame.rolling(window='7D').mean().reset_index())

    # add a circle renderer with a size, color, and alpha
    # p.circle(x='date', y='new_confirmed_cases', size=10, color="navy", source=source, alpha=0.5)
p.vbar(
        x='day_unit',
        top='num_submissions',
#         color="green",
        width=0.4,
#     source=group,
#     fill_color=index_cmap,
        source=source,
        alpha=0.8)#,
#         legend_label=f'Daily new {title_word}')
show(p)

## Cumulative completions (run logs)


Notes


    Maybe this should be done off of the submissions (think I started that elsewhere)

In [None]:
# Started working on this elsewhere. Similar idea to the above, but
# this uses the logs from the skaa runner

bad_stem = 'Unnamed:'

data = pd.read_excel(environment.RUN_DATA_LOG_PATH)
unit_nums = list(set(data.unit_number.tolist()))

to_drop = [ c for c in data.columns if c[: len(bad_stem) ] == bad_stem]
# These are just the inverse of the essay and posts
to_drop.extend(['no_essay', 'no_posts'])

data.drop(to_drop, axis=1, inplace=True)
# d.set_index(['ran_at'], inplace=True)
# d.set_index('unit_number', inplace=True)
# d.set_index(['unit_number', 'ran_at'], inplace=True)


o = []
for u in unit_nums:
    a = data[data.unit_number == u].sort_values('ran_at')
    a.set_index(['unit_number', 'ran_at'], inplace=True)
    anew = a - a.shift(1)
    o.append(anew)

new = pd.concat(o).dropna()
# new

g = sns.barplot(x="ran_at", y="essay",
            hue="unit_number", data=new.reset_index());
g.ax.set_title("Cummulative essay submissions")
g.fig.autofmt_xdate()
# g.fig.set_size((5,5))

data.essay.plot(kind='bar')

fig, axes = plt.subplots(figsize=(8,4))
new.essay.plot(kind='bar', ax=axes)
fig.autofmt_xdate(); fig.tight_layout()
# fig.set_tight_layout()

In [None]:
# counts_comb
g = sns.relplot(x="submitted_at", y="num_submissions",  data=submission_data.reset_index(), hue='unit',
                height=4, aspect=2)
rotate_x_labels(g.ax)
g.fig.tight_layout()

In [None]:
    TOOLTIPS = [
        ("new_confirmed_cases", "$index"),
        ("(x,y)", "($x, $y)"),
        #     ("desc", "@desc"),
    ]

    p = figure(
        plot_width=1000,
        plot_height=400,
        title=f"Daily new {title_word} {place}",
        x_axis_type='datetime')  #,
    #            tooltips=TOOLTIPS)

    source = ColumnDataSource(frame.reset_index())
    source2 = ColumnDataSource(frame.rolling(window='7D').mean().reset_index())

    # add a circle renderer with a size, color, and alpha
    # p.circle(x='date', y='new_confirmed_cases', size=10, color="navy", source=source, alpha=0.5)
    p.vbar(
        x='date',
        top=field_name,
        color="green",
        width=0.4,
        source=source,
        alpha=0.8,
        legend_label=f'Daily new {title_word}')
    p.line(
        x='date',
        y=field_name,
        line_width=5,
        color="navy",
        source=source2,
        alpha=0.3,
        legend_label='7-day rolling avg')

    # df['lower'] = df.new_confirmed_cases.mean() - df.new_confirmed_cases.std()
    # df['upper'] = df.new_confirmed_cases.mean() + df.new_confirmed_cases.std()
    # band = Band(base='x', lower='lower', upper='upper', source=source, level='underlay',
    #             fill_alpha=1.0, line_width=1, line_color='black')
    # p.add_layout(band)

    # show the results
    show(p)

In [None]:
make_test_selector()
make_unit_chooser()

In [None]:
environment.CONFIG.course_ids[0]

# Student responses on reviews

In [None]:
make_test_selector()
make_unit_chooser(num_units=8)

In [None]:
studentRepo = StudentRepository(environment.CONFIG.course)
studentRepo.download()
    
dh = IStep(unit=environment.CONFIG.unit)
dh._initialize_db()
dao = dh.dao

In [None]:
def plot_review_responses(reviewRepo):
    """
    Plots answers to the review questions with bar graphs
    arranged in two columns. 
    
    This fits on the pdf output pretty well
    """
    
    rows = nrows=round(len(reviewRepo.multiple_choice_names)/2) #+1
    fig, axes = plt.subplots(ncols=2, nrows=rows, figsize=(12,20))

    row=0; col=0
    for c in reviewRepo.multiple_choice_names:
        title = c.split(':')[1][:65]
        g = sns.countplot(reviewRepo.data[c], 
                          order=environment.LIKERT_PLOT_ORDER, 
                          palette='plasma', 
                          ax=axes[row, col])
        g.set_xlabel('')
        axes[row, col].set_title(title)
        rotate_x_labels(axes[row, col])
        if col == 1:
            row += 1
            col = 0
        else:
            col += 1

    fig.tight_layout()


## Discussion reviews

### Summaries of reviewer responses

In [None]:
discussion_assocRepo = AssociationRepository(dh.dao, environment.CONFIG.unit.discussion_review)


discussionReviewRepo = WorkRepositoryLoaderFactory.make(course=environment.CONFIG.course, 
                                              activity=environment.CONFIG.unit.discussion_review, 
                                              save=False, 
                                             rest_timeout=5)

In [None]:
plot_review_responses(discussionReviewRepo)

## Essay reviews

In [None]:
essay_assocRepo = AssociationRepository(essay_dao, environment.CONFIG.unit.initial_work)

essayReviewRepo = WorkRepositoryLoaderFactory.make(course=environment.CONFIG.course, 
                                              activity=environment.CONFIG.unit.review, 
                                              save=False, 
                                             rest_timeout=1)

### Summaries of reviewer responses

ToDo
    
    Look at each student across all reviews and figure out how much variation there is in the grading

In [None]:
# reviewRepo.data.assessee_id = reviewRepo.data.assessee_id.astype('int32')

In [None]:
plot_review_responses(essayReviewRepo)

### How do reviewers and reviewees rate each other

(Not sure that this is working because of the triad review structure)

NB, Only available in units with metareview

In [None]:
# Add assessee id to the reviewRepo dataframe
essayReviewRepo.add_review_assignments()


numd = essayReviewRepo.data.copy(deep=True)


for c in essayReviewRepo.multiple_choice_names:
    numd[c] = numd.apply(lambda x: environment.LIKERT_NUM_MAP.get(x[c]), axis=1)


try:
    numd.set_index('student_id', inplace=True)
except KeyError:
    pass

In [None]:
f = []
for i, row in numd.iterrows():
    f.append( {
        'assessor' : i,
        'assessee' : row.assessee_id,
        'total' : sum(row[essayReviewRepo.multiple_choice_names])
    })
f = pd.DataFrame(f)

g = f.copy(deep=True)
g.set_index('assessee', inplace=True)
f.set_index('assessor', inplace=True)

b=[]
for sid in f.index:
    try:
        # gave, recieved
        gave = f.loc[sid].total.mean()
        recd = g.loc[sid].total.mean()
        b.append({'gave': gave, 'recd': recd, 'gap': gave - recd})
    except KeyError:
        pass
b = pd.DataFrame(b)
len(b)

0 : Gave and received the same

\> 0: Gave a better score than they received

< 0: Received a better score than they gave

In [None]:
b.gap.describe()

In [None]:

g = sns.distplot(b.gap.dropna(), rug=True)
g.axes.set_xlim((b.gap.min(), b.gap.max()))

In [None]:
g = sns.violinplot(b.gap.dropna())
g.axes.set_xlim((b.gap.min(), b.gap.max()))

In [None]:
len(b[b.gap >0])

In [None]:
len(b[b.gap < 0])

In [None]:
sns.scatterplot(b.gave, b.recd)

In [None]:
sns.lmplot(x='gave', y='recd', data=b)

In [None]:
sns.lmplot(y='gave', x='recd', data=b)

# Assignment word counts

In [None]:
from CanvasHacks.Assessment.api import get_all_essay_assigns_for_class, store_course_essays
from CanvasHacks.Api.RequestTools import get_all_course_assignments
from CanvasHacks.Definitions.skaa import InitialWork
from CanvasHacks.Assessment.processing import process_essay_entries
from CanvasHacks.Assessment.files import EssayFiles

## Acquire, clean, and store text

### Acquire new assignments

In [None]:
for cid in environment.CONFIG.course_ids:
    store_course_essays(cid, 'S20', start_unit=1)

### Process and store wordbags

In [None]:
filename_handler = EssayFiles()

fiter = filename_handler.make_content_file_iterator()
existing = filename_handler.bag_files

while True:
    with open(next(fiter), 'r') as f:
        print("Processing ", f.name.split('/')[-1:])
        entries = json.load(f)
        process_essay_entries(entries, existing)

## Load bags

ToDo

    Unit 1 from S20 was a quiz assignment so the data needs to be created differently 

In [None]:
from CanvasHacks.Assessment.store import load_stored_bags, EssayAssignment, JournalAssignment, TermUnitStore, TermWeekStore, TokenFiltrationMixin
filename_handler = EssayFiles()

fiter = filename_handler.make_bag_file_iterator()
data = []

try:
    while True:
        with open(next(fiter), 'r') as f:
#             print(f.name)
            o = json.load(f)
            data.append(o)

except StopIteration:
    print("Loaded {} files".format(len(data)))


stores, terms, units = load_stored_bags(filename_handler)

word_counts = pd.DataFrame([{'term': s.term, 'unit' : s.unit, 'word_count': b} for s in stores for b in s.bag_word_counts])
# word_counts.set_index('unit', inplace=True)

## Essay word counts

### All semesters

If your question is 'how do students respond to word counts?' the answer is, they write more than before and exactly the limit

In [None]:
fig, axes = plt.subplots(figsize=(7, 4), nrows=2)
sns.boxplot(data=word_counts, y='word_count', x='unit', ax=axes[0])
sns.violinplot(data=word_counts, y='word_count', x='unit', ax=axes[1])
fig.tight_layout()

In [None]:
max_unit = 8
d = word_counts.set_index('unit')
sns.set_palette('plasma')
# sns.kdeplot(word_counts[word_counts.unit == 2], color='r', label='unit 2')
for i in range(1, max_unit +1):
    try:
        sns.kdeplot(d.loc[i].word_count, label='unit {}'.format(i))
    except: 
        pass
plt.legend()

# unit 6 for S21 vs other semesters (NB, refers to different essay prompt in S20)

In [None]:
s21 = word_counts[ word_counts.term == 'S21'] # and word_counts.unit == 6]
# unit6 = word_counts[ word_counts['term'] == 'S21'] # and word_counts.unit == 6]
s21unit6 = s21[s21.unit == 6]
# s21unit6

In [None]:
unit6 = word_counts[word_counts.unit == 6] #.set_index('term')
# unit6

In [None]:
fig, axes = plt.subplots(figsize=(7, 6), nrows=2)
plt.title('Unit 6 word counts')
sns.boxplot(data=unit6, y='word_count', x='term', ax=axes[0])
sns.violinplot(data=unit6, y='word_count', x='term', ax=axes[1])
fig.tight_layout()

In [None]:
unit6_stats = unit6.groupby('term')['word_count']
unit6_stats = pd.DataFrame(unit6_stats.describe())
unit6_stats

In [None]:
# pct change
pct_diff = []
for idx, row in unit6_stats.iterrows(): #.reset_index()
    if idx != 'S21':
        pct_of_mean = row.mean() / unit6_stats.loc['S21'].mean()
        pct_of_median = row.median() / unit6_stats.loc['S21'].median()
        d = {
            'term' : idx,
             'pct_increase_in_mean' : 1 - pct_of_mean,
            'pct_increase_in_median' : 1 - pct_of_median
        }
        pct_diff.append(d)
pct_diff = pd.DataFrame(pct_diff).set_index('term')
pct_diff

In [None]:
pct_diff.plot(kind='bar')
plt.title("Percent increase in S21 over previous terms")

# Unit-end surveys

In [None]:
make_test_selector()
make_unit_chooser()


TERM = 'S20'

SURVEY_FOLDER = '/Users/adam/Box Sync/TEACHING/Phil 305 Business ethics/Surveys/{}'.format(TERM)

def get_unit(filename):
    s = filename.split('_')[0][-1 : ]
    return int(s)

fiter = makeDataFileIterator( SURVEY_FOLDER )
report_frames = [ ]
try:
    while True:
        f = next( fiter )
        unit_num = get_unit(f)
        print( "loading: ", f )
        frame = pd.read_csv( f )
        frame['term'] = TERM
        frame['unit'] = unit_num
        # this makes it freak out for some reason
        #         frame.set_index('student_id', inplace=True)
        report_frames.append( frame )
except StopIteration:
    pass

class_data = report_frames[0]
# len(class_data)
# class_data

surveyRepo = ReviewRepository(unit.unit_end_survey, course)
surveyRepo.data = class_data
surveyRepo.set_question_columns(surveyRepo.data)
fix_forgot_answers(surveyRepo)

TIME_ORDER = ['Less than 1 hour', '1-3 hours', '3-5 hours', '5-7 hours', 'More than 7 hours']

rows = nrows=round(len(surveyRepo.multiple_choice_names)/2)
fig, axes = plt.subplots(ncols=2, nrows=rows, figsize=(12,30))

row=0; col=0
for c in surveyRepo.multiple_choice_names:
    title = c.split(':')[1][:65]
    if c == surveyRepo.multiple_choice_names[0]:
        order = TIME_ORDER
    else:
        order = [l for l in environment.LIKERT_PLOT_ORDER if l != 'Forgot']
        
    g = sns.countplot(surveyRepo.data[c], order=order, palette='plasma', ax=axes[row, col])
    g.set_xlabel('')
    axes[row, col].set_title(title)
    rotate_x_labels(axes[row, col])
    if col == 1:
        row += 1
        col = 0
    else:
        col += 1
fig.tight_layout()

# Attic

In [None]:
def get_newest_data(activity):
    # get data from newest file
    fiter = makeDataFileIterator( activity.folder_path )
    report_frames = [ ]
    try:
        while True:
            f = next( fiter )
            print( "loading: ", f )
            frame = pd.read_csv( f )
            frame.submitted = pd.to_datetime( frame.submitted )
            if 'student_id' not in frame.index:
                frame.rename( { 'id': 'student_id' }, axis=1, inplace=True )
            # this makes it freak out for some reason
            #         frame.set_index('student_id', inplace=True)
            report_frames.append( frame )
    except StopIteration:
        return sort_frames_by_age( report_frames )[0]
    

In [None]:
# TEST = False

# environment.CONFIG.set_unit_number(1)

# if TEST:
#     environment.CONFIG.set_test()
# # environment.CONFIG.set_live()

# COURSE_ID = environment.CONFIG.course_ids[0]
# print("Working on course: ", COURSE_ID)


# UNIT_NUMBER = 1

# _initialize based on selection
# todo eventually should be integrated into config
# course = canvas.get_course(COURSE_ID)
# unit = Unit(course, environment.CONFIG.unit)
# codeRepo = AccessCodeRepo(ACCESS_CODES_FP, environment.CONFIG.unit)



# if TEST:
#     # testing: in memory db
#     dao = SqliteDAO()
#     print("Connected to testing db")
# else:
#     db_filepath = "{}/{}-Unit-{}-review-assigns.db".format( environment.LOG_FOLDER, SEMESTER_NAME, environment.CONFIG.unit)
#     # real: file db
#     dao = SqliteDAO(db_filepath)
#     dao.initialize_db_file()
#     print("Connected to REAL db")

# associationRepo = AssociationRepository(dao, unit.review)

In [None]:
# from CanvasHacks.DAOs.db_files import DBFilePathHandler
# if environment.CONFIG.is_test:
#     # testing: in memory db
#     essay_dao = SqliteDAO()
#     discussion_dao = SqliteDAO()
#     print("Connected to testing db")
# else:
#     discussion_dao = SqliteDAO(DBFilePathHandler.discussion_review(environment.CONFIG.unit_number))
#     essay_dao = SqliteDAO(DBFilePathHandler.essay_review(environment.CONFIG.unit_number))
    

In [None]:

# def load_stored_bags(file_handler):
#     fiter = file_handler.make_bag_file_iterator()
#     data = []
    
#     if isinstance(file_handler, EssayFiles):
#         AssignmentObj = EssayAssignment
#         ComboObj = TermUnitStore
#     elif isinstance(file_handler, JournalFiles):
#         AssignmentObj = JournalAssignment(**d)
#         ComboObj = TermWeekStore

#     try:
#         while True:
#             with open(next(fiter), 'r') as f:
#                 d = json.load(f)
#                 o = AssignmentObj(**d)
#                 data.append(o)

#     except StopIteration:
#         print("Loaded {} files".format(len(data)))

#     terms = list(set([e.term for e in data]))
    
#     try:
#         divs = list(set([e.unit_number for e in data]))
#     except NameError:
#         divs = list(set([e.week_num for e in data]))

#     stores = []
#     for t in terms:
#         for w in divs:
#             stores.append(ComboObj(t, w, data))

# #     len(week_stores)
    
#     return stores, terms, divs