In [72]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.plotly as plotl
import plotly.graph_objs as go
import plotly.tools as tls
plt.style.use('ggplot')  

In [87]:
def vocabSavedRange(dl):
	if dl == 0: return '0'
	elif 0 < dl < 1: return '0-1'
	elif 1 <= dl <= 10: return '1-10'
	elif 10 < dl <= 20: return '11-20'
	elif 20 < dl <= 40: return '21-40'
	elif 40 < dl <= 60: return '41-60'
	elif 60 < dl <= 80: return '61-80'
	elif 80 < dl <= 100: return '81-100'
	elif dl > 100: return '> 100'
	else: return 'None'


def numOfUsersbyAvgDictionarySize(preppedDataWithoutSection):
	"""
	More than half of students save an avg of 1-20% of words in their dictionaries
	An avg. video vocab count is 239, so between 10 to 25 words is saved from video on average by most users
	"""
	df = pd.read_json(preppedDataWithoutSection) #read_json
	df['avg_words_saved_perc'] = df['avg_words_saved'] * 100
	df = df.groupby(['user']).mean() #avg overall words saved by user, get overall avg of individual averages per video
	df['user_group'] = df['avg_words_saved_perc'].map(vocabSavedRange)
	results = df.groupby(['user_group']).size()
	return results

raw_data = numOfUsersbyAvgDictionarySize('prepped-wo-section.json')
final = go.Bar(
            x=raw_data.values,
            y=raw_data.index,
            orientation = 'h'
)
data = [final]
layout = go.Layout(
    title='Exploring students dictionary usage',
    xaxis=dict(
        title='# of users',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Avg. % of words saved overall',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = go.Figure(data=data, layout=layout)
plotl.iplot(fig, filename='numOfUsersbyAvgDictionarySize')

In [90]:
def listenScoreRange(dl):
	if dl < -1: return '<-1'
	elif dl == -1: return 'Incomplete'
	elif dl == 0: return '0'
	elif 0 < dl <= 1: return '0-1'
	elif 1 < dl <= 49: return '2-49'
	elif 49 < dl <= 59: return '50-59'
	elif 59 < dl <= 62: return '60-62'
	elif 62 < dl <= 66: return '63-66'
	elif 66 < dl <= 69: return '67-69'
	elif 69 < dl <= 72: return '70-72'
	elif 72 < dl <= 76: return '73-76'
	elif 76 < dl <= 79: return '77-79'
	elif 79 < dl <= 84: return '80-84'
	elif 84 < dl <= 89: return '85-89'
	elif 89 < dl <= 100: return '90-100'
	elif dl > 100: return '>100'
	else: return 'None'

def numOfUsersByScoreRange(preppedDataWithoutSection):
	"""
	e.g. users and their score range
	"""
	df = pd.read_json(preppedDataWithoutSection) #read_json in jupyter
	df = df.groupby(['user']).mean()
	df['avg_score_range'] = df['avg_score'].map(listenScoreRange)
	results = df.groupby(['avg_score_range']).size()
	return results #df[['avg_score', 'avg_score_range']]

raw_data2 = numOfUsersByScoreRange('prepped-wo-section.json')
final = go.Bar(
            x=raw_data2.values,
            y=raw_data2.index,
            orientation = 'h'
)
data = [final]
layout = go.Layout(
    title='Exploring students performance',
    xaxis=dict(
        title='# of users',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Avg. score',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = go.Figure(data=data, layout=layout)
plotl.iplot(fig, filename='numOfUsersByScoreRange')