In [9]:
%matplotlib inline
from collections import defaultdict
import pymc as pm
from pymc.Matplot import plot
import numpy as np
from matplotlib import pyplot as plt
import json
from pymongo import MongoClient
import pymongo
from bson.objectid import ObjectId
import jsonpickle

In [None]:
B_mu = pm.Normal('B_mu_generator',0,500)
data = list()
for i in range(0,1000):
    data.append(B_mu.random())
plt.hist(data, bins=25, histtype="stepfilled", normed=True)

In [None]:
B_tau = pm.Gamma('B_tau_generator',10,0.1)
data = list()
for i in range(0,1000):
    data.append(B_tau.random())
plt.hist(data, bins=25, histtype="stepfilled", normed=True)

In [None]:
B_tau = pm.Gamma('B_tau_generator',50,0.1)
data = list()
for i in range(0,1000):
    B_val = B_tau.random()
    B = pm.Normal('B_generator',0,B_val)
    data.append(B.random())
plt.hist(data, bins=25, histtype="stepfilled", normed=True)

In [None]:
B_tau = pm.Gamma('B_tau_generator',50,0.1)
B = pm.Normal('B_generator',0,B_tau)
data = list()
for i in range(0,1000):
    data.append(B.random())
plt.hist(data, bins=25, histtype="stepfilled", normed=True)

In [None]:
B = pm.Normal('B_generator',0,500)
data = list()
for i in range(0,1000):
    B_val = B_tau.random()
    B = pm.Normal('B_generator',0,B_val)
    data.append(B.random())
plt.hist(data, bins=25, histtype="stepfilled", normed=True)

# Reformating data and seralizing

In [1]:
class Question(object):
    def __init__(self,_id,_type, assignment, text, max_grade_value):
        self._id = _id
        self._type = _type
        self.assignment = assignment
        self.text = text
        self.max_grade_value = max_grade_value

In [2]:
class Answer(object):
    def __init__(self, _id, grading, question, flagged):
        self._id = _id
        self.grading = grading
        self.question = question
        self.flagged = flagged
        
    def add_text_answere(self, text):
        self.text = text
    def add_value_answere(self, graded_value, normalized):
        self.grade_value = graded_value
        self.normalized = normalized

In [3]:
class Grading(object):
    def __init__(self, _id, grader, handin, answeres, assignment, feedback_grade):
        self._id = _id
        self.grader = grader
        self.answeres = answeres # list of asnwer ids
        self.handin = handin
        self.assignment = assignment
        self.feedback_grade = feedback_grade

In [4]:
class User(object):
    def __init__(self, _id, name, graded_handins):
        self._id = _id
        self.name = name
        self.graded_handins = graded_handins # list of handins ids

In [5]:
class Handin(object):
    def __init__(self,_id, assignment, owners, gradings):
        self._id = _id
        self.assignment = assignment
        self.owners = owners # list of grader ids
        self.gradings = gradings #list of grading ids

In [6]:
class Assignment(object):
    def __init__(self, _id, title, questions, handins):
        self._id = _id
        self.title = title
        self.questions = questions # list of question ids
        self.handins = handins # list of handins ids

In [24]:
class Course(object):
    def __init__(self, _id, title, assignments, participants):
        self._id = _id
        self.title = title
        self.assignments = assignments # list of assignments ids
        self.participants = participants # list of graders ids

## Fetching data

In [10]:
client = MongoClient()
db = client.heroku_rnwkcq9r

Find all the ids of the relevant objects

In [11]:
assignments_i = list()
graders_i = list()
questions_i = list() # assignment id as key
assignment_question_i = defaultdict(list)
questions_assignment_i = dict() # assignment id as key
handins_i = defaultdict(list) # assignment id as key
gradings_i = defaultdict(list) # handin id as key 
grading_handin_i = dict() # grading id as key
answeres_i = defaultdict(list) # gradings id as key

course = db.course.find_one({'_id': ObjectId("55d9cb9ea6fb8f00080da4a1")})

for gradr_i in course['students']:
    for g_d in db.user.find({'_id': gradr_i}):
        graders_i.append(g_d)

for ass_d in db.assignment.find({'course': course['_id']}):
    ass_i = str(ass_d['_id'])
    assignments_i.append(ass_d)
    # questions 
    for sec_i in ass_d['sections']:
        sec_d = db.question_section.find_one({'_id': sec_i})
        for q_i in sec_d['questions']:
            for q_d in db.question.find({'_id': q_i}):
                questions_i.append(q_d)
                questions_assignment_i[str(q_i)] = ass_i
                assignment_question_i[str(ass_i)].append(str(q_i))
                
    # handins
    for han_d in db.handin.find({'assignment': ObjectId(ass_i)}):
        han_i = str(han_d['_id'])
        handins_i[ass_i].append(han_d)
        #gradings
        for grad_d in db.report_grade.find({'handin': ObjectId(han_i), 'state': 'ANSWERED'}):
            grad_i = str(grad_d['_id'])
            gradings_i[han_i].append(grad_d)
            grading_handin_i[grad_i] = han_i
            #answeres
            for answer_d in db.answer.find({'report_grade': ObjectId(grad_i)}):
                answeres_i[grad_i].append(answer_d)

Use the found ids to populate the previous defined objects

In [12]:
def attr(entries,key):
    return map(lambda x: x[key],entries)

In [13]:
def question_max_value(question_id):
    question = db.question.find_one({'_id': ObjectId(question_id)})
    if question['question_type'] == "boolean":
        return 1
    elif question["question_type"] == "numerical":
        if 'numericalAnswers' in question:
            max_value = max(map(int,question['numericalAnswers'].keys()))
            return max_value
        else:
            return 5

In [14]:
def answer_value_normalized(answer_id):
    answer = db.answer.find_one({'_id': answer_id})
    q_id = answer['question']
    if 'numerical_answer' in answer:
        return answer['numerical_answer'] / float(question_max_value(q_id))
    elif 'boolean_answer' in answer:
        return answer['boolean_answer'] / float(question_max_value(q_id))

In [15]:
def answer_value(answer_id):
    '''returns the normalized value of an answere'''
    answer = db.answer.find_one({'_id': answer_id})
    q_id = answer['question']
    if 'numerical_answer' in answer:
        return answer['numerical_answer']
    elif 'boolean_answer' in answer:
        return answer['boolean_answer']

In [16]:
def graders_gradings(grader_id):
    gradings = list()
    for g in db.report_grade.find({'giver': grader_id}):
        gradings.append(g)
    return gradings

Question objects: dependent on assignment

In [17]:
with open(r"BigDataFormattedData/questions.txt", "w") as o_file:
    for q in questions_i:
        _id = str(q['_id'])
        q_obj = Question(_id,q['question_type'],questions_assignment_i[_id],q['text'],question_max_value(_id))
        q_enc = jsonpickle.encode(q_obj,unpicklable=False)
        o_file.write(q_enc + "\n")

Answer objects: dependent on question

In [18]:
with open(r"BigDataFormattedData/answers.txt", "w") as o_file:
    for g, ans in answeres_i.iteritems():
        for a in ans:
            _id = a['_id']
            flagged = False
            if 'flagged' in a:
                flagged = a['flagged'] 
            a_obj = Answer(str(_id), g, str(a['question']),flagged)
            if 'text_answer'in a:
                a_obj.add_text_answere(a['text_answer'])
            else:
                a_obj.add_value_answere(answer_value(_id),answer_value_normalized(_id))
            a_enc = jsonpickle.encode(a_obj,unpicklable=False)
            o_file.write(a_enc + "\n")

Grading objects: dependant on grader, answeres and handin

In [19]:
with open(r"BigDataFormattedData/gradings.txt", "w") as o_file:
    for h, gradings in gradings_i.iteritems():
        for g in gradings:
            _id = str(g['_id'])
            answeres = map(lambda x:str(x),attr(answeres_i[_id],'_id'))
            feedback = None
            if 'feedback_grade' in g:
                feedback = g['feedback_grade']
            g_obj = Grading(_id, str(g['giver']), str(g['handin']), answeres, str(g['assignment']),feedback)
            g_enc = jsonpickle.encode(g_obj,unpicklable=False)
            o_file.write(g_enc + "\n")

Grader object: dependent on handins

In [20]:
with open(r"BigDataFormattedData/users.txt", "w") as o_file:
        for g in graders_i:
            _id = str(g['_id'])
            g_obj = User(_id, g['name'],map(str,(attr(graders_gradings(g['_id']),'_id'))))
            g_enc = jsonpickle.encode(g_obj,unpicklable=False)
            o_file.write(g_enc + "\n")

Handin objects: dependent on graders and gradings

In [21]:
with open(r"BigDataFormattedData/handins.txt", "w") as o_file:
    for a, handins in handins_i.iteritems():
        for h in handins:
            _id = str(h['_id'])
            owners = map(lambda x:str(x),h['students'])
            gradings = map(lambda x:str(x),attr(gradings_i[_id],'_id'))
            h_obj = Handin(_id, a, owners, gradings)
            h_enc = jsonpickle.encode(h_obj,unpicklable=False)
            o_file.write(h_enc + "\n")

Assignment objects: depndent on questions and handins

In [22]:
with open(r"BigDataFormattedData/assignments.txt", "w") as o_file:
    for a in assignments_i:
        _id = str(a['_id'])
        a_obj = Assignment(_id, a['title'], assignment_question_i[_id], map(str,attr(handins_i[_id],'_id')))
        a_enc = jsonpickle.encode(a_obj,unpicklable=False)
        o_file.write(a_enc + "\n")

Course object: dependent on graders and assignemnt

In [25]:
with open(r"BigDataFormattedData/courses.txt", "w") as o_file:
    _id = str(course['_id'])
    assignments = map(str,attr(assignments_i,'_id'))
    c_obj = Course(_id, course['title'], assignments, map(str,course['students']))
    c_enc = jsonpickle.encode(c_obj,unpicklable=False)
    o_file.write(c_enc + "\n")

## Storing data