In [1]:
# import req'd modules
import json
import os
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import pmagpy.builder as builder

## Playing with json & unicode

In [2]:

# json is the same format that the MagIC data model comes in

# turn json into Python:
json_string = '{"first_name": "Guido", "last_name":"Rossum"}'
parsed = json.loads(json_string)

# turn Python into json
d = {'hello': 'hi', 'so long': 'goodbye'}
dumped = json.dumps(d)

# store json in a file
outfile = open('stored.json', 'w')
json.dump(dumped, outfile)
outfile.close()

# read json file into Python
jstring = json.load(open('stored.json', 'r'))
json.loads(jstring)

{u'hello': u'hi', u'so long': u'goodbye'}

In [3]:
# parsing unicode
unicode('\r\n\n\xc2\xa0\xc2\xa0', errors='ignore')

u'\r\n\n'

## Reading the data model in to pandas

In [4]:
# the code in this block has been incorporated into data_model3.py

def get_data_model():
    model_file = os.path.join('3_0', 'MagIC Data Model v3.0 - unpublished.json')
    f = open(model_file, 'r')
    string = '\n'.join(f.readlines())
    raw = json.loads(unicode(string, errors='ignore'))
    full = DataFrame(raw)
    return full
    
full = get_data_model()
DataFrame(full['tables']['locations'])
location = DataFrame(full['tables']['locations']['columns'])
location = location.transpose()
#full['tables']['locations'].pop('columns')
#full['tables']['locations']
# don't really need anything that isn't in ['tables'][table]['columns']
location[:3]

full_df = get_data_model()

data_model = {}
levels = ['specimens', 'samples', 'sites', 'locations', 'criteria']
for level in levels:
    df = DataFrame(full_df['tables'][level]['columns'])
    data_model[level] = df.transpose()

data_model['sites']

Unnamed: 0,description,examples,group,label,notes,position,previous_columns,type,unit,urls,validations
age,Site inferred age,,Age,Inferred Age,,29,"[{u'column': u'site_inferred_age', u'table': u...",Number,Custom,,"[requiredUnless(""age_low"",""age_high""), require..."
age_high,"Site inferred age, High range",,Age,Inferred Age High,,32,"[{u'column': u'site_inferred_age_high', u'tabl...",Number,Custom,,"[min(""age_low""), requiredUnless(""age"")]"
age_low,"Site inferred age, Low range",,Age,Inferred Age Low,,31,"[{u'column': u'site_inferred_age_low', u'table...",Number,Custom,,"[max(""age_high""), requiredUnless(""age"")]"
age_sigma,"Site inferred age, Uncertainty",,Age,Inferred Age Sigma,Standard error or standard deviation at one sigma,30,"[{u'column': u'site_inferred_age_sigma', u'tab...",Number,Custom,,[min(0)]
age_unit,"Site inferred age, Unit",,Age,Inferred Age Unit,,33,"[{u'column': u'site_inferred_age_unit', u'tabl...",String,,,"[cv(""age_unit""), required()]"
analyst_names,Colon-delimited list of EarthRef handles and/o...,[@user1:@user2:Not A. Member <no.earthref.hand...,Metadata,Analyst Names,,112,"[{u'column': u'er_analyst_mail_names', u'table...",List,,,"[type(""users"")]"
aniso_f,"Foliation, T2/T3",,Anisotropy,Anisotropy F,,97,"[{u'column': u'anisotropy_f', u'table': u'rmag...",Number,Dimensionless,,
aniso_ff,"Log foliation, ln(F)",,Anisotropy,Anisotropy F',,99,"[{u'column': u'anisotropy_ff', u'table': u'rma...",Number,Dimensionless,[http://dx.doi.org/10.1130/0016-7606(1977)88<1...,
aniso_fl,F/L,,Anisotropy,Anisotropy FL Ratio,,101,"[{u'column': u'anisotropy_fl', u'table': u'rma...",Number,Dimensionless,,
aniso_ftest,F statistical test for anisotropy,,Anisotropy,Anisotropy F Test,,102,"[{u'column': u'anisotropy_ftest', u'table': u'...",Number,%,,


## Extracting info from the data model

In [5]:
# how to get various different data from the data model

# get all headers of a particular group
cond = location['group'] == 'Age'
age_columns = location[cond]
age_columns

Unnamed: 0,description,examples,group,label,notes,position,previous_columns,type,unit,urls,validations
age_high,"Location inferred age, High range",,Age,Inferred Age High,,36,"[{u'column': u'average_age_high', u'table': u'...",Number,Custom,,"[min(""age_low""), requiredUnlessTable(""sites"")]"
age_low,"Location inferred age, Low range",,Age,Inferred Age Low,,35,"[{u'column': u'average_age_low', u'table': u'p...",Number,Custom,,"[max(""age_high""), requiredUnlessTable(""sites"")]"
age_unit,"Location inferred age, Age unit",,Age,Inferred Age Unit,,37,"[{u'column': u'average_age_unit', u'table': u'...",String,,,"[cv(""age_unit""), requiredUnlessTable(""sites"")]"


In [6]:
# get a particular column
location.ix['age_high']

description                         Location inferred age, High range
examples                                                          NaN
group                                                             Age
label                                               Inferred Age High
notes                                                             NaN
position                                                           36
previous_columns    [{u'column': u'average_age_high', u'table': u'...
type                                                           Number
unit                                                           Custom
urls                                                              NaN
validations            [min("age_low"), requiredUnlessTable("sites")]
Name: age_high, dtype: object

In [7]:
# get validations for a particular column
validations = location.ix['age_high']['validations']
validations

[u'min("age_low")', u'requiredUnlessTable("sites")']

In [8]:
# get all groups for locations
location['group'].unique()

array([u'Age', u'Metadata', u'Result', u'Direction', u'Position',
       u'Expedition', u'Names', u'Geology', u'Location', u'PADM',
       u'Paleoposition', u'PDM', u'Pole'], dtype=object)

In [9]:
# get all rows in a group
group = 'Direction'
location[location['group'] == group]

Unnamed: 0,description,examples,group,label,notes,position,previous_columns,type,unit,urls,validations
conglomerate_test,Classification and result of the (intra-format...,,Direction,Conglomerate Test,,50,"[{u'column': u'conglomerate_test', u'table': u...",String,Flag,,"[cv(""conglomerate_test"")]"
contact_test,Classification and result of the (inverse) con...,,Direction,Baked Contact Test,,51,"[{u'column': u'contact_test', u'table': u'pmag...",String,Flag,,"[cv(""contact_test"")]"
dic_inc,Location direction in coordinates specified by...,,Direction,Direction Inclination,,40,"[{u'column': u'average_inc', u'table': u'pmag_...",Number,Degrees,,"[min(-90), max(90), requiredIfGroup(""Direction"")]"
dir_alpha95,Location direction in coordinates specified by...,,Direction,Direction Alpha 95%,Confidence Level = 95%,41,"[{u'column': u'average_alpha95', u'table': u'p...",Number,Degrees,,[min(0)]
dir_dec,Location direction in coordinates specified by...,,Direction,Direction Declination,,39,"[{u'column': u'average_dec', u'table': u'pmag_...",Number,Degrees,,"[min(0), max(360), requiredIfGroup(""Direction"")]"
dir_k,Location direction in coordinates specified by...,,Direction,Direction K,,43,"[{u'column': u'average_k', u'table': u'pmag_re...",Number,Dimensionless,,[min(0)]
dir_k_ratio,Comparison of Fisher dispersion K after and be...,,Direction,Direction Tilt K Ratio,,44,"[{u'column': u'tilt_k_ratio', u'table': u'pmag...",Number,Dimensionless,,[min(0)]
dir_n_samples,Number of samples included in directional calc...,,Direction,Direction N Samples,,46,"[{u'column': u'average_nn', u'table': u'pmag_r...",Integer,,,[min(0)]
dir_n_sites,Number of sites included in directional calcul...,,Direction,Direction N Sites,,45,"[{u'column': u'average_n', u'table': u'pmag_re...",Integer,,,[min(0)]
dir_polarity,"Location direction polarity is normal (n), rev...",,Direction,Direction Polarity,,47,,String,Flag,,"[cv(""polarity"")]"


In [10]:
# get all column labels for locations
print list(location.index)
print 'required()' in location.ix['location_name']['validations']
print 'required()' in location.ix['continent_ocean']['validations']

[u'age_high', u'age_low', u'age_unit', u'analyst_names', u'citations', u'conglomerate_test', u'contact_test', u'continent_ocean', u'country', u'criteria_names', u'description', u'dic_inc', u'dir_alpha95', u'dir_dec', u'dir_k', u'dir_k_ratio', u'dir_n_samples', u'dir_n_sites', u'dir_polarity', u'dir_r', u'dir_tilt_correction', u'elevation_high', u'elevation_low', u'expedition_description', u'expedition_leg', u'expedition_name', u'expedition_ship', u'expedition_url', u'experiment_names', u'external_database_ids', u'fold_test', u'fold_test_significance', u'geologic_classes', u'geological_province_sections', u'lat_n', u'lat_s', u'lithologies', u'location_name', u'location_name_alternatives', u'location_type', u'lon_e', u'lon_w', u'method_codes', u'ocean_sea', u'padm', u'padm_n_sites', u'padm_sigma', u'paleolat', u'paleolat_sigma', u'paleolon', u'paleolon_sigma', u'pdm', u'pdm_n_sites', u'pdm_sigma', u'pi_names', u'plate_blocks', u'pole_alpha95', u'pole_antipodal_angle', u'pole_bc_q', u'pol

In [11]:
# get list of unique groups for location
print location['group'].unique()

#sort column names by group
location.sort_values('group')



[u'Age' u'Metadata' u'Result' u'Direction' u'Position' u'Expedition'
 u'Names' u'Geology' u'Location' u'PADM' u'Paleoposition' u'PDM' u'Pole']


Unnamed: 0,description,examples,group,label,notes,position,previous_columns,type,unit,urls,validations
age_high,"Location inferred age, High range",,Age,Inferred Age High,,36,"[{u'column': u'average_age_high', u'table': u'...",Number,Custom,,"[min(""age_low""), requiredUnlessTable(""sites"")]"
age_low,"Location inferred age, Low range",,Age,Inferred Age Low,,35,"[{u'column': u'average_age_low', u'table': u'p...",Number,Custom,,"[max(""age_high""), requiredUnlessTable(""sites"")]"
age_unit,"Location inferred age, Age unit",,Age,Inferred Age Unit,,37,"[{u'column': u'average_age_unit', u'table': u'...",String,,,"[cv(""age_unit""), requiredUnlessTable(""sites"")]"
reversal_test,Classification and result of the reversal test,,Direction,Reversal Test,,52,"[{u'column': u'reversal_test', u'table': u'pma...",String,Flag,,"[cv(""reversal_test"")]"
fold_test_significance,Significance level achieved in tilt correction...,,Direction,Fold Test Significance,,48,"[{u'column': u'fold_test_significance', u'tabl...",Number,%,,"[min(0), max(100)]"
fold_test,Classification and result of the folding test,,Direction,Fold Test,,49,"[{u'column': u'fold_test', u'table': u'pmag_re...",String,Flag,,"[cv(""fold_test"")]"
dir_tilt_correction,Percentage tilt correction applied to the data,,Direction,Direction Tilt Correction,Correction between geographic (0%) and stratig...,38,"[{u'column': u'tilt_correction', u'table': u'p...",Number,%,,"[min(-3), max(100), requiredIfGroup(""Direction"")]"
dir_polarity,"Location direction polarity is normal (n), rev...",,Direction,Direction Polarity,,47,,String,Flag,,"[cv(""polarity"")]"
dir_n_sites,Number of sites included in directional calcul...,,Direction,Direction N Sites,,45,"[{u'column': u'average_n', u'table': u'pmag_re...",Integer,,,[min(0)]
dir_n_samples,Number of samples included in directional calc...,,Direction,Direction N Samples,,46,"[{u'column': u'average_nn', u'table': u'pmag_r...",Integer,,,[min(0)]


In [12]:
# get headers the way we do them in the current builder.py
# not sure we will actually want to do it like this as we update magic_gui.py and pmag_gui.py
cond = location['validations'].map(lambda x: 'required()' in str(x))

reqd_loc_headers = [str(i) for i in location[cond].index]
all_loc_headers = [str(i) for i in location['validations'].index if i not in reqd_loc_headers]
headers = [[], reqd_loc_headers, all_loc_headers] # this is basically how self.headers is organizaed now in builder.py


headers

[[],
 ['location_name', 'location_type'],
 ['age_high',
  'age_low',
  'age_unit',
  'analyst_names',
  'citations',
  'conglomerate_test',
  'contact_test',
  'continent_ocean',
  'country',
  'criteria_names',
  'description',
  'dic_inc',
  'dir_alpha95',
  'dir_dec',
  'dir_k',
  'dir_k_ratio',
  'dir_n_samples',
  'dir_n_sites',
  'dir_polarity',
  'dir_r',
  'dir_tilt_correction',
  'elevation_high',
  'elevation_low',
  'expedition_description',
  'expedition_leg',
  'expedition_name',
  'expedition_ship',
  'expedition_url',
  'experiment_names',
  'external_database_ids',
  'fold_test',
  'fold_test_significance',
  'geologic_classes',
  'geological_province_sections',
  'lat_n',
  'lat_s',
  'lithologies',
  'location_name_alternatives',
  'lon_e',
  'lon_w',
  'method_codes',
  'ocean_sea',
  'padm',
  'padm_n_sites',
  'padm_sigma',
  'paleolat',
  'paleolat_sigma',
  'paleolon',
  'paleolon_sigma',
  'pdm',
  'pdm_n_sites',
  'pdm_sigma',
  'pi_names',
  'plate_blocks',
  

## Low-level builder stuff with 3.0 -- (but probably irrelevant now that we are switching to MagicDataFrame and Contributions)

In [13]:
os.chdir('/Users/nebula/Python/PmagPy')

from pmagpy import validate_upload
import pmagpy.builder as builder

In [14]:
reload(builder)
reload(validate_upload)
dmodel = full
b = builder.ErMagicBuilder('/Users/nebula/Python/PmagPy/3_0', dmodel)
b.data_model['locations']


loc = builder.Location('location', None, b.data_model)
site = builder.Site('a_site', loc, b.data_model)
samp = builder.Sample('heyo', site, b.data_model)
spec = builder.Specimen('hi', site, b.data_model)


In [15]:
# all of the specimen methods are working

reload(builder)
b.specimens = []
spec = b.add_specimen('new_spec', None, {'geologic_classes': "class1:class2"})
print b.specimens
print spec.data
b.change_specimen('new_spec', 'cool_spec', 'sample', {'geologic_types': "type1:type2"})
print spec.data
b.change_specimen('cool_spec', 'cool_spec', 'sample', {'geologic_types': "type3"}, replace_data=True)
print spec.data
b.delete_methods['specimen']('cool_spec')
print b.specimens
print b.samples

[specimen: new_spec]
{'geologic_classes': 'class1:class2'}
-W- sample is not a currently existing sample.
Creating a new sample named: sample 
{'geologic_classes': 'class1:class2', 'geologic_types': 'type1:type2'}
{'geologic_types': 'type3'}
[]
[sample: sample]


In [16]:
reload(builder)
b = builder.ErMagicBuilder('/Users/nebula/Python/PmagPy/3_0', dmodel)

spec = b.add_specimen('spec', None, {'geologic_classes': 'awesome'}, ['Geology', 'Specimen'])
spec.groups

l = []
data_type = 'specimens'
# add headers that are required for a group
for group in spec.groups:
    l.extend(list(b.data_model[data_type][b.data_model[data_type]['group'] == group].index))

# add cols that are req'd no matter what        
cond = b.data_model[data_type]['validations'].map(lambda x: 'required()' in str(x))
l.extend(list(b.data_model[data_type][cond].index))

# how to deal with the requiredUnless, requiredIf stuff.... : \

b.data_model[data_type]['validations']['age']
#b.data_model['specimens']['group'].unique()



[u'requiredUnless("age_low","age_high")', u'requiredIf("age_sigma")']

In [17]:
b.data_model['specimens'].ix['age']['type']

u'Number'