In [1]:
##################################################
## GET INFORMATION FROM STRAND SPECIFIC STUDIES ##
##################################################

# import necessary libraries
import json
from collections import Counter
from pprint import pprint
from matplotlib import pyplot as plt
import pandas as pd
import seaborn
import csv
plt.style.use('ggplot')

# import dataset
with open('Batch1.json') as f:
    data=json.load(f)

In [7]:
def extract_values(obj, key):
    arr=[]
    
    def extract(obj, arr, key):
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v,  arr, key)
                elif k == key:
                    arr.append(v)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr
    
    results = extract(obj, arr, key)
    return results

names = extract_values(data["CodeSets"][1], "AttributeName")
pprint(names)

['Section 1 What are the details of the study design?',
 'What was the study design?',
 'Individual RCT',
 'Cluster RCT',
 'Multisite RCT',
 'Prospective QED',
 'Retrospective QED  ',
 'Interrupted time series QED',
 'Regression Discontinuity with randomisation',
 'Regression Discontinuity - not randomised',
 'Regression Continuity  - naturally occurring',
 'What is the number of schools involved in the study?',
 'What is the number of schools involved in the intervention group(s)?',
 'What is the number of schools involved in the control or comparison group?',
 'What is the total number of schools involved?',
 'Not provided/ unclear / not applicable',
 'What is the number of classes involved?',
 'What is the total number of classes involved in the intervention group?',
 'What is the total number of classes involved in the control or comparison '
 'group?',
 'What is the total number of classes involved?',
 'Not provided/ unclear / not applicable',
 'Are details of randomisation provid

In [2]:
def get_strand_info():
    ''' 
    a function that returns
    a dict containing strand labels
    and corresponding attribute ids
    '''
    strands={}
    for counter, element in enumerate(data["CodeSets"][0]["Attributes"]["AttributesList"]):
        attribute_name=(data["CodeSets"][0]["Attributes"]["AttributesList"][counter]["AttributeName"])
        attribute_id=(data["CodeSets"][0]["Attributes"]["AttributesList"][counter]["AttributeId"])
        strands.update( {attribute_id:attribute_name} )
    return strands

In [4]:
test = get_strand_info()
test

{5023544: 'Arts participation',
 5023545: 'Aspiration interventions',
 5023546: 'Behaviour interventions',
 5023547: 'Block scheduling',
 5023550: 'Built environment',
 5023551: 'Collaborative learning',
 5023552: 'Digital technology',
 5023554: 'Early years intervention',
 5023553: 'Extending school time',
 5023555: 'Feedback',
 5023556: 'Homework',
 5023557: 'Individualised instruction',
 5023558: 'Learning styles',
 5023559: 'Mastery learning',
 5023560: 'Mentoring',
 5023561: 'Metacognition and self-regulation',
 5023562: 'One to one tuition',
 5023563: 'Oral language interventions',
 5023564: 'Outdoor adventure learning',
 5023565: 'Parental engagement',
 5023548: 'Peer tutoring',
 5023566: 'Performance pay',
 5023567: 'Phonics',
 5023568: 'Reading comprehension strategies',
 5023569: 'Reducing class size',
 5023570: 'Repeating a year',
 5023571: 'School uniform',
 5023572: 'Setting or streaming',
 5023549: 'Small group tuition',
 5023573: 'Social and emotional learning',
 5023574

In [181]:
# section 1: what are the details of the study design

# what was the study design
for counter, element in enumerate(data["CodeSets"][1]["Attributes"]["AttributesList"]):
    pprint(data["CodeSets"][1]["Attributes"]["AttributesList"][0]["Attributes"]["AttributesList"][counter]["AttributeName"])

    


'What was the study design?'
'What is the number of schools involved in the study?'
'What is the number of classes involved?'


In [210]:
# print section titles and embedded questionw

for counter, element in enumerate(data["CodeSets"][1]["Attributes"]["AttributesList"]):
    print(data["CodeSets"][1]["Attributes"]["AttributesList"][counter]["AttributeName"])
    print("\n")
    for num, element in enumerate(data["CodeSets"][1]["Attributes"]["AttributesList"][counter]["Attributes"]["AttributesList"]):
        print("\t ", data["CodeSets"][1]["Attributes"]["AttributesList"][counter]["Attributes"]["AttributesList"][num]["AttributeName"])
    print("\n")

Section 1 What are the details of the study design?


	  What was the study design?
	  What is the number of schools involved in the study?
	  What is the number of classes involved?
	  Are details of randomisation provided?


Section 2 How is the sample described?


	  What is the sample size for the intervention group?
	  What is the sample size for the control group?
	  *What is the sample size for the second intervention group?
	  *What is the sample size for the third intervention group?
	  Does the study report any group differences at baseline? 
	  Is comparability taken into account in the analysis?
	  Is attrition or drop out reported?
	  What is the attrition in the treatment group?
	  Are the variables used for comparability reported?
	  Is clustering accounted for in the analysis?


Section 3 Outcome details


	  Outcomes
	  Outcome classification
	  DO NOT USE




In [488]:
# get study design options and attribute IDs from top layer

# Section 1 What are the details of the study design?

study_design={}
for element in data["CodeSets"][1]["Attributes"]["AttributesList"][0]["Attributes"]["AttributesList"][0]["Attributes"]["AttributesList"]:
    study_design.update( { element["AttributeId"]:element["AttributeName"]} )
    
number_of_schools={}
for element in data["CodeSets"][1]["Attributes"]["AttributesList"][0]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"]:
    number_of_schools.update( { element["AttributeId"]:element["AttributeName"]} )
    
number_of_classes={}
for element in data["CodeSets"][1]["Attributes"]["AttributesList"][0]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"]:
    number_of_classes.update( { element["AttributeId"]:element["AttributeName"]} )
    
randomisation_detail={}
for element in data["CodeSets"][1]["Attributes"]["AttributesList"][0]["Attributes"]["AttributesList"][3]["Attributes"]["AttributesList"]:
    randomisation_detail.update( { element["AttributeId"]:element["AttributeName"]} )

# Section 2 How is the sample described?

one = {}
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][0]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][0]["AttributeId"]
one.update ( { att_id:att_name} )

two = {}
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][1]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][1]["AttributeId"]
two.update ( { att_id:att_name} )

three = {}
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][2]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][2]["AttributeId"]
three.update ( { att_id:att_name} )

four = {}
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][3]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][3]["AttributeId"]
four.update ( { att_id:att_name} )
four

baseline_group_differences={}
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][4]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][4]["AttributeId"]
baseline_group_differences.update ( { att_id:att_name} )
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][4]["Attributes"]["AttributesList"][0]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][4]["Attributes"]["AttributesList"][0]["AttributeId"]
baseline_group_differences.update ( { att_id:att_name} )
att_name = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][4]["Attributes"]["AttributesList"][1]["AttributeName"]
att_id   = data["CodeSets"][1]["Attributes"]["AttributesList"][1]["Attributes"]["AttributesList"][4]["Attributes"]["AttributesList"][1]["AttributeId"]
baseline_group_differences.update ( { att_id:att_name} )

baseline_group_differences





{5406836: 'Does the study report any group differences at baseline? ',
 5406866: 'Yes',
 5406860: 'No/Unclear'}