## Create dataframe from sign_props.json file 

In [3]:
import json
import pandas as pd

data_dict = {}
columns = []
with open('sign_props.json') as json_file:
    data = json.load(json_file)    
    #print(data)
    row_index = 1
    for attr in data[0]:
        columns.append(attr)
    for record in data:
        row = []
        for attr in record:
            row.append(record[attr])
        data_dict[row_index] = row
        row_index += 1
        
#print(data_dict)
df = pd.DataFrame.from_dict(data_dict, orient='index', columns=columns)
df

Unnamed: 0,index,EntryID,LemmaID,Code,SignFrequency(M),SignFrequency(SD),SignFrequency(Z),SignFrequency(N),PercentUnknown,SignFrequency(M-Native),...,MinorLocation.2.0,SecondMinorLocation.2.0,Contact.2.0,NonDominantHandshape.2.0,UlnarRotation.2.0,Batch,Item,List,bglm_aoa,empirical_aoa
1,0,&=arms-cross,&=arms-cross,J_01_098,3.467,2.177,-0.458,30.0,0.0,3.400,...,,,,,,J,98.0,1.0,,
2,1,&=clap,,J_02_076,3.867,,-0.127,,,,...,,,,,,J,76.0,2.0,,
3,2,&=clasp,$=clasp,J_01_032,2.308,1.644,-1.129,29.0,0.0,2.267,...,FingerRadial,FingerRadial,1.0,1,0.0,J,32.0,1.0,,
4,3,&=cover-ears,&=cover-ears,J_03_047,2.280,1.646,-1.163,28.0,0.0,2.167,...,Neutral,Neutral,1.0,open_b,0.0,J,47.0,3.0,,
5,4,&=raise-hand,&=raise-hand,H_02_048,5.154,1.891,0.544,28.0,0.0,5.500,...,,,,,,H,48.0,2.0,,
6,5,&=sigh,&=sigh,H_03_086,3.931,1.751,-0.196,30.0,0.0,3.267,...,,,,,,H,86.0,3.0,,
7,6,1_dollar,1_dollar,C_02_025,3.034,1.500,-0.775,29.0,0.0,2.944,...,FingerBack,HandAway,1.0,B,0.0,C,25.0,2.0,,
8,7,5_dollars,5_dollars,B_01_062,5.120,1.716,0.302,25.0,0.0,5.214,...,Neutral,,0.0,,1.0,B,62.0,1.0,,
9,8,8_hour,8_hour,C_02_055,4.267,1.574,-0.043,30.0,0.0,4.444,...,FingerFront,Palm,1.0,B,0.0,C,55.0,2.0,,
10,9,9_oclock,9_oclock,C_02_080,4.133,1.592,-0.126,30.0,0.0,4.778,...,WristBack,,1.0,,0.0,C,80.0,2.0,,


## Finding categorical and range attributes

In [5]:
numerical_attr = list(df.select_dtypes(include=['float', 'int']).columns)
categorical_attr = []
for column in list(df.columns):
    if column not in numerical_attr:
        categorical_attr.append(column)   
print(numerical_attr)
print(categorical_attr)

['index', 'SignFrequency(M)', 'SignFrequency(SD)', 'SignFrequency(Z)', 'SignFrequency(N)', 'PercentUnknown', 'SignFrequency(M-Native)', 'SignFrequency(SD-Native)', 'SignFreq(Z-native)', 'SignFrequency(N-Native)', 'PercentUnknown(Native)', 'PercentGlossAgreement', 'PercentGlossAgreement(Native)', 'Iconicity(M)', 'Iconicity(SD)', 'Iconicity(Z)', 'Iconicity(N)', 'SubtLexUSLog10WF', 'SignOnset(ms)', 'SignOffset(ms)', 'SignLength(ms)', 'ClipLength(ms)', 'MinimalNeighborhoodDensity', 'MaximalNeighborhoodDensity', 'Parameter-BasedNeighborhoodDensity', 'D.Iconicity(M) all', 'D.Iconicity(SD) all', 'D.Iconicity(N) all', 'D.Iconicity(Z) all', 'D.Iconicity(M) native', 'D.Iconicity(SD) native', 'D.Iconicity(N) native', 'D.Iconicity(Z) native', 'Complexity', 'RightWristX', 'RightWristcentroidY', 'DistanceRightWristCentroid2Nose', 'Initialized.2.0', 'FingerspelledLoanSign.2.0', 'Compound.2.0', 'NumberOfMorphemes.2.0', 'MarkedHandshape.2.0', 'FlexionChange.2.0', 'Spread.2.0', 'SpreadChange.2.0', 'Thum

## Find the constraints of the attributes 
### For categorical attributes we find all the possible values and for numberical attributes we find min and max 

In [32]:
import math 
import pprint

constraints = {}
for attr in categorical_attr:
    if attr not in ['EntryID', 'LemmaID', 'Code', 'Iconicity_ID']:
        columnsData = df.loc[ : , attr ]
        constraints[attr] = list(set(list(columnsData)))
        
for attr in numerical_attr:    
    #drop nan values 
    columnsData = df.loc[ : , attr ].dropna()
    constraints[attr] = {}  
    #print(math.floor(min(list(columnsData))), math.ceil(max(list(columnsData))), attr)
    constraints[attr]['min'] = math.floor(min(list(columnsData)))
    constraints[attr]['max'] = math.ceil(max(list(columnsData)))

pprint.pprint(constraints)    

{'Batch': ['C', 'D', 'E', 'B', None, 'F', 'A', 'J', 'G', 'H'],
 'ClipLength(ms)': {'max': 3737, 'min': 901},
 'Complexity': {'max': 6, 'min': 0},
 'Compound.2.0': {'max': 1, 'min': 0},
 'Contact.2.0': {'max': 1, 'min': 0},
 'D.Iconicity(M) all': {'max': 7, 'min': 1},
 'D.Iconicity(M) native': {'max': 7, 'min': 1},
 'D.Iconicity(N) all': {'max': 31, 'min': 26},
 'D.Iconicity(N) native': {'max': 22, 'min': 18},
 'D.Iconicity(SD) all': {'max': 3, 'min': 0},
 'D.Iconicity(SD) native': {'max': 3, 'min': 0},
 'D.Iconicity(Z) all': {'max': 3, 'min': -2},
 'D.Iconicity(Z) native': {'max': 3, 'min': -2},
 'DistanceRightWristCentroid2Nose': {'max': 2193, 'min': 1002},
 'FingerspelledLoanSign.2.0': {'max': 1, 'min': 0},
 'Flexion.2.0': ['Curved',
                 'Stacked',
                 'FullyClosed',
                 'Flat',
                 'FullyOpen',
                 None,
                 'Crossed',
                 'Bent'],
 'FlexionChange.2.0': {'max': 1, 'min': 0},
 'Handshape.2.0': 

## Store attribute constraints in a json file

In [33]:
import json

with open('constraints.json', 'w') as fp:
    json.dump(constraints, fp)