# Clean data

Create dataframes we can work with

In [92]:
import json
import os
import numpy as np
import pandas as pd

In [93]:
def fix_layout(width:int=95):
    from IPython.core.display import display, HTML
    display(HTML('<style>.container { width:' + str(width) + '% !important; }</style>'))
    
fix_layout()

# Orchestration Graph

# Read data

In [94]:
activity_names = os.listdir('data/ac')
operator_names = os.listdir('data/op')

Activities have:
```
activity.json
config.json
object.json
product.json
```

Operators have:
```
config.json
object.json
product.json
```

In [95]:
class Activity:
    
    def __init__(self, name):
        self.name = name
        
        with open(f'data/ac/{self.name}/activity.json') as f:
            self.activity = json.load(f)
        with open(f'data/ac/{self.name}/config.json') as f:
            self.config = json.load(f)
        with open(f'data/ac/{self.name}/object.json') as f:
            self.object = json.load(f)
        with open(f'data/ac/{self.name}/product.json') as f:
            self.product = json.load(f)
            
class Operator:
    
    def __init__(self, name):
        self.name = name
        
        with open(f'data/op/{self.name}/config.json') as f:
            self.config = json.load(f)
        with open(f'data/op/{self.name}/object.json') as f:
            self.object = json.load(f)
        with open(f'data/op/{self.name}/product.json') as f:
            self.product = json.load(f)

In [96]:
class Activities:pass
activities = Activities()

for a in activity_names:  
    setattr(activities, f"_{a.split('__')[0].replace('-', '_')}", Activity(name=a))

In [97]:
class Operators: pass
operators = Operators() 

for o in operator_names:
    setattr(operators, f"_{o.split('__')[0].replace('-', '_')}", Operator(name=o))

# Use data

Example of how to call the activity '10-dashboard-activity' from the set of activities (`activities._10_dashboard_activity`) and get the content of `activity.json` (with `.activity`).

In [98]:
dir(activities)

['_10_dashboard_activity',
 '_10_quiz',
 '_1_theory_introduction',
 '_2_dashboard_activity',
 '_2_pre_quiz',
 '_3_study_resources_sgdm',
 '_4_meet_the_team_sit_to',
 '_4_teams_slides',
 '_5_chat_about_sgdm',
 '_5_dashboard_activity',
 '_5_discuss_with_the_tea',
 '_6_study_resources_dgsm',
 '_7_chat_about_dgsm',
 '_7_dashboard_activity',
 '_7_discuss_with_the_tea',
 '_8_gestures_map',
 '_9_quiz',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__']

In [99]:
activities._10_dashboard_activity.activity

{'_id': 'ck3yv0jj802lh01ywfr9a7e7b',
 'length': 2,
 'plane': 3,
 'startTime': 19,
 'title': 'Dashboard activity',
 'activityType': 'ac-dash',
 'configVersion': 1,
 'graphId': 'ck3yv0jj602l501yw6nbabhtd',
 'participationMode': 'projector',
 'state': 'computed',
 'actualStartingTime': '2019-12-10T09:19:54.988Z',
 'actualClosingTime': '2019-12-10T09:21:41.793Z'}

Same goes for the operators. But we only have one operator (`._manually_group`) so only this one will be called. Also, getting the content of the `config.json` is done with `.config`

In [100]:
operators._manually_group.config

{'groupingKeys': 'group',
 'studentmapping': 'oriane_peter,same1\nantoine_clivaz,same1\nguilhem_sicard,same2\njohan_barthas,same2\nalfonso_fernandez,same3\nmarcel_moya,same3\nnatalia_gullon,same3\ngiacomo_alliata,same4\nguido_sergi,same4\nmax_schnaubelt,same5\nanel_muhamedagic,same5\nkiarash_farivar,mixed1\nambroise_renaud,mixed1\njoshua_lowin,mixed1\nsofia_kypraiou,mixed2\nmahmoud_said,mixed2\nlukas_deloose,mixed3\nthanuditha_wickramasinghe,mixed3\narnaud_garin,mixed3\nkim_haeeun,mixed4\ngraziano_rossini,mixed4\nalexandre_thimonier,mixed5\nanna_andersson,mixed5\n',
 'defaultGroupingValues': 'undefined'}

![og](og.png)

---

In [101]:
activities._10_quiz.config

{'shuffle': 'none',
 'questions': [{'multiple': False,
   'text': False,
   'value': False,
   'answers': [{'choice': 'neck slide'},
    {'choice': 'wink'},
    {'choice': 'call me'},
    {'choice': 'thumbs up'}],
   'question': '<p>Which gesture do west African parents use to imply that their children should leave the room when they have guests?</p>'},
  {'question': '<p>The “OK” hand gesture considered as an insult in …….. ? <img src="/minio/uploads/ck3w6cleb00ad307ivzt9ams9" width="239" height="203"/></p>',
   'answers': [{'choice': 'England'},
    {'choice': 'Italy'},
    {'choice': 'Mexico'},
    {'choice': 'Venezuela'}]},
  {'question': '<p>What are you going to lose in Japan, if someone does “the neck slide” gesture to you:<img src="/minio/uploads/ck3w6m0ey00ag307ih886dh3g" width="263" height="175"/></p>',
   'answers': [{'choice': 'Your family'},
    {'choice': 'Your life'},
    {'choice': 'Your job'},
    {'choice': 'Your money'}]},
  {'question': '<p>You are in Sweden and you

In [102]:
activities._10_quiz.activity

{'_id': 'ck3yv0jj802la01yw70jz3lx9',
 'length': 2,
 'plane': 1,
 'startTime': 19,
 'title': 'Quiz',
 'activityType': 'ac-quiz',
 'configVersion': 1,
 'graphId': 'ck3yv0jj602l501yw6nbabhtd',
 'state': 'computed',
 'actualStartingTime': '2019-12-10T09:19:54.172Z',
 'actualClosingTime': '2019-12-10T09:21:41.792Z'}

In [103]:
def quiz_data(quiz):
    student_ids = list(map_id_student.keys())
    for idx in student_ids: 
        quiz[map_id_student[idx]] = quiz.pop(idx)
        
    student_answers = {}
    for student in quiz.keys():
        if quiz[student]["data"]["answers"][0]:
            student_answers[student] = quiz[student]["data"]["answers"]
    student_answers["CORRECT"] = ['wink', 'Venezuela','Your job','Make a sharp sucking noise','Cyprus','Ancient Romans']
    student_answers["TYPE"] = ['SGDM', 'SGDM', 'SGDM', 'DGSM', 'DGSM', 'DGSM']

    df = pd.DataFrame(student_answers)
    for c in df.columns[:-2]:
        df[c] = (df[c] == df.CORRECT).astype(int)
    return df

In [104]:
map_id_student = activities._10_quiz.object["globalStructure"]["students"]
df_pre = quiz_data(activities._2_pre_quiz.product["payload"])
df_post = quiz_data(activities._10_quiz.product["payload"])

In [113]:
columns = ["student", "score", "score_norm", "quiz_type", "when"]
data = []

for c in df_pre.columns[:-2]:
    # pre
    student_score = df_pre[[c,'TYPE']].groupby('TYPE').agg(sum).to_dict()[c]
    data.append([c, student_score["DGSM"], student_score["DGSM"]/3*100, "DGSM", "PRE-QUIZ"])
    data.append([c, student_score["SGDM"], student_score["SGDM"]/3*100, "SGDM", "PRE-QUIZ"])

    # post
    student_score = df_post[[c,'TYPE']].groupby('TYPE').agg(sum).to_dict()[c]
    data.append([c, student_score["DGSM"], student_score["DGSM"]/3*100, "DGSM", "POST-QUIZ"])
    data.append([c, student_score["SGDM"], student_score["SGDM"]/3*100, "SGDM", "POST-QUIZ"])

df_student_scores = pd.DataFrame(data, columns=columns)
df_student_scores.to_csv("scores/quiz_SGDM_DGSM.csv", index=False)
df_student_scores

Unnamed: 0,student,score,score_norm,quiz_type,when
0,louis,0,0.000000,DGSM,PRE-QUIZ
1,louis,1,33.333333,SGDM,PRE-QUIZ
2,louis,2,66.666667,DGSM,POST-QUIZ
3,louis,3,100.000000,SGDM,POST-QUIZ
4,kevin,1,33.333333,DGSM,PRE-QUIZ
...,...,...,...,...,...
75,alfonso_fernandez,3,100.000000,SGDM,POST-QUIZ
76,natalia_gullon,0,0.000000,DGSM,PRE-QUIZ
77,natalia_gullon,1,33.333333,SGDM,PRE-QUIZ
78,natalia_gullon,1,33.333333,DGSM,POST-QUIZ


---

In [None]:
obj = activities._10_quiz.product

In [None]:
teams = list(obj['payload'].keys())
answers_pre_quiz_2 = list(obj['payload'].values())

In [None]:
answers_pre_quiz_2

In [None]:
del answers_dgsm[1]
del teams[1]

In [None]:
import re

def cleanhtml(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    return cleantext

In [None]:
count = 0
pre_quiz_2 = []

for i in range(len(answers_dgsm)):
    try:
        username = answers_pre_quiz_2[i]['data']['msg'].split()[0]
        answers = dict(zip([cleanhtml(x).strip() for x in answers_pre_quiz_2[i]['data']['questions']], answers_pre_quiz_2[i]['data']['answers']))
        pre_quiz_2.append((username, answers))
    except:
        count += 1
        print(count)

In [None]:
pre_quiz_2

In [None]:
len(pre_quiz_2)

In [None]:
pre_quiz_2[-1] = list(pre_quiz_2[-1])
pre_quiz_2[-1][0] = 'mixed2'
pre_quiz_2[-1] = tuple(pre_quiz_2[-1])

In [None]:
np.save('Final_Quiz_Answers', np.array(pre_quiz_2))

***

In [None]:
def score(lst1, lst2): 
    lst3 = [value for value in lst1 if value in lst2] 
    return len(lst3)

In [None]:
quiz = np.load('DGSM_Answers.npy', allow_pickle=True)
answers = pd.read_csv('DGSM_Correct_Answers.tsv', sep='\t', header=None)

In [None]:
answers

In [None]:
scores = {}

for i in range(len(quiz)):
    user = quiz[i][0]
    s_ = score(list(quiz[i][1].values()), list(answers.iloc[:,1]))
    scores[user] = [s_, s_/len(list(answers.iloc[:,1]))]

In [None]:
scores

In [None]:
list(quiz[9][1].values())

In [None]:
list(answers.iloc[:,1])

In [None]:
np.save('scores/Map_Scores.npy', map_score)

In [None]:
np.load('scores/Map_Scores.npy', allow_pickle=True).item()

In [None]:
map_score = np.load('scores/Map_Scores.npy', allow_pickle=True)

In [None]:
scores = [[int(x), int(x)/3] for x in map_score[:,1].tolist()]

In [None]:
users = map_score[:,0].tolist()

In [None]:
map_score = dict(zip(users,scores))

---