In [5]:
##################################################
## GET INFORMATION FROM STRAND SPECIFIC STUDIES ##
##################################################

# import necessary libraries
import json
from collections import Counter
from pprint import pprint
from matplotlib import pyplot as plt
import pandas as pd
import seaborn
import csv
plt.style.use('ggplot')

# import dataset
with open('/home/jon/json/ToolkitExtraction/data/Batch1.json') as f:
    data=json.load(f)

In [87]:
# get attribute names and ids for "what is the educational setting?" from top-layer (codesets)
def get_edu_info():
    edu_setting={}
    for counter, value in enumerate(data["CodeSets"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"]):
        setting_code=data["CodeSets"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][counter]["AttributeId"]
        setting_name=data["CodeSets"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][counter]["AttributeName"]
        edu_setting.update( {setting_code:setting_name} )
    return edu_setting

edu = get_edu_info()

for key, value in edu.items():
    print(key, value)
print(type(edu))

5215410 Nursery school/pre-school
5215411 Primary/elementary school
5215412 Middle school
5215413 Secondary/High school
5215414 Residential/boarding school
5215415 Independent/private school
5215416 Home
5215566 Further education/junior or community college
5215417 Other educational setting (please specify)
5215418 Outdoor adventure setting
5513033 No information provided
<class 'dict'>


True

In [118]:
######################################
### DISPLAY STRAND SUMMARY INFORMATION 
######################################
    
def get_strand_summary():
    '''
    A function that produces a basic
    summary of strand study counts
    and a graph to display them
    '''
    global counts, strand_title
    strand_overview=[]
    for element in range(len(data["References"])):
        for key, value in strands.items():
            for section in range(len(data["References"][element]["Codes"])):
                if key == data["References"][element]["Codes"][section]["AttributeId"]:
                    a=(data["References"][element]["ItemId"])
                    b=(data["References"][element]["Title"])
                    strand_overview.append([value, key, a, b])

    strand_title=[]
    for element in strand_overview:
        strand_title.append(element[0])
        
    print(len(strand_title))
    pprint(strand_title)

    counts = Counter(strand_title) 
    pprint(counts)

###################################################
### GET STRAND LABELS AND KEYS FROM TOP OUTER LAYER
###################################################

def get_strand_info():
    ''' 
    a function that returns
    a dict containing strand labels
    and corresponding attribute ids
    '''
    strands={}
    for counter, element in enumerate(data["CodeSets"][0]["Attributes"]["AttributesList"]):
        attribute_name=(data["CodeSets"][0]["Attributes"]["AttributesList"][counter]["AttributeName"])
        attribute_id=(data["CodeSets"][0]["Attributes"]["AttributesList"][counter]["AttributeId"])
        strands.update( {attribute_id:attribute_name} )
    return strands

#######################################
### GET THE ID FOR OUR STRAND OF CHOICE 
#######################################

def get_strand_value(strand_label):
    ''' 
    A function that takes in a 
    strand name and returns
    the strand ID
    '''
    for key, value in strands.items():
        if value == strand_label:
            return key, value

######################################################
###  GET EFFECT SIZE INFO FROM STRAND SPECIFIC STUDIES
######################################################

def get_data(strand_key, strand_value, outcome_choice):
    '''
    A function that accepts a strand id and a variable of
    interest and returns a list of that id and the variable
    values.
    '''
    outcome_studies=[] 

    # iterate over each study of 'references'
    for section in range(len(data["References"])):
        # iterate over each section within each study of 'references'
        for study in range(len(data["References"][section]["Codes"])):
            # check each study to see if strand id is present
            if strand_id[0] == data["References"][section]["Codes"][study]["AttributeId"]:
                if "Outcomes" in data["References"][section]:
                    if data["References"][section]["Outcomes"][0]["OutcomeText"] == outcome_choice:
                        outcome_id=((data["References"][section]["Outcomes"][0]["OutcomeId"]))
                        outcome_type=(data["References"][section]["Outcomes"][0]["ShortTitle"])
                        outcome_text=(data["References"][section]["Outcomes"][0]["OutcomeText"])
                        SMD=(data["References"][section]["Outcomes"][0]["SMD"])
                        SESMD=(data["References"][section]["Outcomes"][0]["SESMD"])
                        year=(data["References"][section]["Year"])
                        intervention=(data["References"][section]["Outcomes"][0]["InterventionText"])
                        outcome_studies.append([strand_key, strand_value, outcome_id, outcome_text, outcome_type, year, intervention, SMD, SESMD])
              
    # display number of studies found within selected strand
    print("Number of studies:", len(outcome_studies), "\n")
    
    pd.set_option('display.max_rows', None)
    
    # convert data list to pandas dataframe for viewing
    df = pd.DataFrame(outcome_studies, columns=['AttributeId', 'Strand', 'OutcomeId', 'OutcomeType', 'ShortTitle', 'Year', 'Intervention', 'SMD', 'SESMD'])
    display(df.head(len(df)))

In [119]:
strands = get_strand_info()
strand_id = get_strand_value("Feedback") # select strand
get_data(strand_id[0], strand_id[1], "Primary outcome")

Number of studies: 89 



Unnamed: 0,AttributeId,Strand,OutcomeId,OutcomeType,ShortTitle,Year,Intervention,SMD,SESMD
0,5023555,Feedback,46356,Primary outcome,Adler (1998),1998,Literacy: writing,0.165,0.223
1,5023555,Feedback,49729,Primary outcome,Anderson (1973),1973,Mathematics,1.154701,0.230978
2,5023555,Feedback,46229,Primary outcome,Andrade (2008),2008,Literacy: writing,0.83,0.2
3,5023555,Feedback,47394,Primary outcome,Aumiller (1963),1963,Literacy: spelling,-0.005847,0.145104
4,5023555,Feedback,46370,Primary outcome,Benson (1979) 1_1,1979,Literacy: writing,0.22,0.12
5,5023555,Feedback,49773,Primary outcome,Benson (1979) 1_2,1979,Literacy: writing,0.211533,0.145509
6,5023555,Feedback,48085,Primary outcome,Bethge (1982),1982,Cognitive: other,0.85523,0.30271
7,5023555,Feedback,48088,Primary outcome,Bilsky (1978),1978,Mathematics,2.428889,0.405638
8,5023555,Feedback,46278,Primary outcome,Bohannon (1975),1975,Literacy: decoding/phonics,2.75,0.66
9,5023555,Feedback,48104,Primary outcome,Boulet (1990) 1_1,1990,Curriculum: other,-0.032907,0.283773


In [62]:
schools=[]
for section in range(len(data["References"])):
        # iterate over each section within each study of 'references'
        for study in range(len(data["References"][section]["Codes"])):
            for key, value in edu.items():
                if key == data["References"][section]["Codes"][study]["AttributeId"]:
                    schools.append(value)
                    
print(len(schools))
pprint(schools)

print(edu.keys())

471
['Primary/elementary school',
 'Primary/elementary school',
 'Secondary/High school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Middle school',
 'Primary/elementary school',
 'Secondary/High school',
 'Middle school',
 'Primary/elementary school',
 'Nursery school/pre-school',
 'Nursery school/pre-school',
 'Middle school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Middle school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Nursery school/pre-school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Secondary/High school',
 'Middle school',
 'Middle school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Secondary/High school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Primary/elementary school',
 'Mi

In [88]:
if 5215411 in edu:
    print("yes")

yes


In [69]:
print(edu)

{5215410: 'Nursery school/pre-school', 5215411: 'Primary/elementary school', 5215412: 'Middle school', 5215413: 'Secondary/High school', 5215414: 'Residential/boarding school', 5215415: 'Independent/private school', 5215416: 'Home', 5215566: 'Further education/junior or community college', 5215417: 'Other educational setting (please specify)', 5215418: 'Outdoor adventure setting', 5513033: 'No information provided'}


In [126]:
a=0
b=0

for section in range(len(data["References"])):
    for study in range(len(data["References"][section]["Codes"])):
        if strand_id[0] == data["References"][section]["Codes"][study]["AttributeId"]:
            print("yes")
        for key, values in edu.items():
            if key == data["References"][section]["Codes"][study]["AttributeId"]:
                a+=1
            else:
                b+=1
        
print('a', a)
print('b', b)

yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
yes
a 471
b 320168


In [128]:
def get_edu_info():
    edu_setting=[]
    for counter, value in enumerate(data["CodeSets"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"]):
        setting_code=data["CodeSets"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][2]["Attributes"]["AttributesList"][counter]["AttributeId"]
        edu_setting.append(setting_code)
    return edu_setting

edu = get_edu_info()
edu

[5215410,
 5215411,
 5215412,
 5215413,
 5215414,
 5215415,
 5215416,
 5215566,
 5215417,
 5215418,
 5513033]