In [1]:
import programs.new_builder as nb
from pmagpy import ipmag
import os
import json
import numpy as np
import sys

import pandas as pd
from pandas import DataFrame
from pmagpy import builder
from pmagpy import validate_upload
from pmagpy import pmag

pmagpy_dir = os.path.join(os.getcwd(), 'pmagpy')
if pmagpy_dir not in sys.path:
    sys.path.append(pmagpy_dir)
    
working_dir = "/Users/nebula/Python/PmagPy/3_0"

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



# DataFrame way

In [2]:
# MagicDataFrame
# Python object containing a DataFrame that corresponds to 1 table
# plus helper methods
        
class MagicDataFrame(object):

    def __init__(self, magic_file):
        data, dtype = pmag.magic_read(magic_file)
        self.df = DataFrame(data)
        if dtype.endswith('s'):
            dtype = dtype[:-1]
            name = '{}_name'.format(dtype)
            if dtype == 'contribution':
                name = 'doi'
            self.df.index = self.df[name]
            #del self.df[name]                                                                                   
            self.df.dtype = dtype
            # replace '' with np.nan, so you can use isnull(), notnull(), etc.                                   
            # can always switch back with DataFrame.fillna('')                                                   
            self.df[self.df == ''] = np.nan


    def add_blank_row(self, label):
        col_labels = self.df.columns
        blank_item = pd.Series({}, index=col_labels, name=label)
        self.df = self.df.append(blank_item)

    def get_name(self, col_name, df_slice="", index_names=""):
        """                                                                                                      
        Takes in a column name, and either a DataFrame slice or                                                  
        a list of index_names to slice self.df using fancy indexing.                                             
        Then return the value for that column in the relevant slice.                                             
        """
        # if slice is provided, use it                                                                           
        if any(df_slice):
            df_slice = df_slice
        # if given index_names, grab a slice using fancy indexing                                                
        elif index_names:
            df_slice = self.df.ix[index_names]
        # otherwise, use the full DataFrame                                                                      
        else:
            df_slice = self.df
        # if the slice is empty, return ""                                                                       
        if len(df_slice) == 0:
            return ""
        # if the column name isn't present in the slice, return ""                                               
        if col_name not in df_slice.columns:
            return ""
        # otherwise, return the first value from that column                                                     
        return df_slice[col_name][0]
    
    def get_di_block(self, df_slice=None, do_index=False, item_names=[], tilt_corr='100'):
        """                                                                                                      
        Input either a DataFrame slice                                                                           
        or                                                                                                       
        do_index=True and a list of index_names.                                                                 
        Output dec/inc from the slice in this format:                                                            
        [[dec1, inc1], [dec2, inc2], ...]                                                                        
        """
        if isinstance(df_slice, str):
            if df_slice == "all":
                # use entire DataFrame                                                                           
                df_slice = self.df
        elif do_index:
            # use fancy indexing (but note this will give duplicates)                                            
            df_slice = self.df.ix[item_names]
        elif not do_index:
            # otherwise use the provided slice                                                                   
            df_slice = df_slice

        # once you have the slice, fix up the data                                                               
        df_slice = df_slice[df_slice['dir_tilt_correction'] == tilt_corr]
        df_slice = df_slice[df_slice['dir_inc'].notnull() & df_slice['dir_dec'].notnull()]
        # possible add in:                                                                                       
        # split out di_block from this study from di_block from other studies (in citations column)              
        # for now, just use "This study"                                                                         
        if 'citations' in df_slice.columns:
            df_slice = df_slice[df_slice['citations'] == "This study"]

        # convert values into DIblock format                                                                     
        di_block = [[float(row['dir_dec']), float(row['dir_inc'])] for ind, row in df_slice.iterrows()]
        return di_block



In [68]:
# Contribution consists of multiple MagicDataFrames

class Contribution(object):

    def __init__(self, directory):
        directory = os.path.realpath(directory)
        tables = ['measurements', 'specimens', 'samples',
                  'sites', 'locations', 'contribution',
                  'criteria', 'ages', 'images']

        self.tables = {}
        for name in tables:
            filename = os.path.join(directory, name + ".txt")
            if os.path.exists(filename):
                self.tables[name] = MagicDataFrame(filename)


    def rename_item(self, table_name, item_old_name, item_new_name):

        # define some helper methods:                                                                            
        def split_if_str(x):
            if isinstance(x, str):
                return x.split(':')
            else:
                return x

        def put_together_if_str(x):
            try:
                return ":".join(x)
            except TypeError:
                return x

        def replace_colon_delimited_value(df, col_name, old_value, new_value):
            """                                                                                                  
            Col must contain list                                                                                
            """
            for index, row in df[df[col_name].notnull()].iterrows():
                names_list = row[col_name]
                try:
                    ind = names_list.index(old_value)
                except ValueError:
                    continue
                names_list[ind] = new_value


        # initialize some things                                                                                 
        item_type = table_name
        col_name = item_type[:-1] + "_name"
        col_name_plural = col_name + "s"
        table_df = self.tables[item_type].df
        # rename item in its own table                                                                           
        table_df.rename(index={item_old_name: item_new_name}, inplace=True)
        # rename in any parent/child tables                                                                      
        for table_name in self.tables:
            df = self.tables[table_name].df
            col_names = df.columns
            # change anywhere col_name (singular, i.e. site_name) is found 
            if col_name in col_names:
                df[col_name].where(df[col_name] != item_old_name, item_new_name, inplace=True)
            # change anywhere col_name (plural, i.e. site_names) is found                                    
                
            if col_name_plural in col_names:
                df[col_name_plural + "_list"] = df[col_name_plural].apply(split_if_str)
                replace_colon_delimited_value(df, col_name_plural + "_list", item_old_name, item_new_name)
                df[col_name_plural] = df[col_name_plural + "_list"].apply(put_together_if_str)
            #self.tables[table_name].df = df


def reset_con():
    con = Contribution(working_dir)
    site_container = con.tables['sites']
    site_container.df = site_container.df
    return con, site_container, site_container.df
            
# create a Contribution 

con = Contribution(working_dir)

site_container = con.tables['sites']
site_container.df = site_container.df



Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
extra_special_site,45.0,110.0,This study,,,107.7,-9.3,125.0,6,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,extra_special_site,,,
extra_special_site,45.0,110.0,This study,,,106.0,-54.2,125.0,6,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,extra_special_site,,,
extra_special_site,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 16,106.0,-54.2,,1,,...,48.639491,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.89768,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,extra_special_site,35.26202829,196.5441708,1.0


In [4]:
# DataFrame of all read-in sites
site_container.df[:5]

Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,10.0,135.0,This study,,,293.1,34.5,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,10.0,135.0,This study,,,289.8,43.6,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,,...,48.626429,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.894142,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,1,31.65155202,185.5745062,1.0
2,12.0,140.0,This study,,,290.6,31.9,243.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,2,,,
2,12.0,140.0,This study,,,285.7,42.0,243.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,2,,,


In [5]:
# all sites with site_name (index) of '1'
# will return a smaller DataFrame (or a Series if there is only 1 row with that index)
site_container.df.ix['1']


Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,10.0,135.0,This study,,,293.1,34.5,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,10.0,135.0,This study,,,289.8,43.6,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,,...,48.626429,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.894142,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,1,31.65155202,185.5745062,1.0


In [6]:
# index by position (using an integer), will always return a single record as Series
# in this case, get the second record
site_container.df.ix[1]

bed_dip                                                           10
bed_dip_direction                                                135
citations                                                 This study
conglomerate_test                                                NaN
description                                                      NaN
dir_dec                                                        289.8
dir_inc                                                         43.6
dir_k                                                            517
dir_n_samples                                                      5
dir_polarity                                                       n
dir_tilt_correction                                              100
geologic_classes                                           Extrusive
geologic_types                                             Lava Flow
lat                                                              NaN
lithologies                       

In [7]:
# return all sites with the description column filled in
cond = site_container.df['description'].notnull()
site_container.df[cond]

Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,,...,48.626429,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.894142,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,1,31.65155202,185.5745062,1
2,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 2,285.7,42.0,,1,,...,48.637185,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.913846,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,2,28.11008553,187.514758,1
3,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 3,307.4,37.9,,1,,...,48.640811,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.928039,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,3,40.26060376,167.8862917,1
4,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 4,302.8,29.9,,1,,...,48.655732,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.959099,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,4,33.46021472,167.4988097,1
5,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 5,294.7,42.5,,1,,...,48.661489,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.958187,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,5,34.30926268,181.2568193,1
6,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 6,106.1,-44.2,,1,,...,48.661051,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.948459,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,6,29.54088872,188.6533022,1
7,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 7,109.8,-62.4,,1,,...,48.678061,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.97532,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,7,42.90020395,203.8166221,1
8,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 8,140.7,-64.8,,1,,...,48.680014,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.973994,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,8,63.77760756,192.7357421,1
9,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 9,110.0,-56.6,,1,,...,48.676652,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.940536,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,9,39.32495688,196.4878545,1
10,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 10,128.2,-42.0,,1,,...,48.664307,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.930409,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,10,42.89645001,169.9339018,1


In [8]:
# update site's value for 'conglomerate_test' to 25 if that value was previously null
site_container.df['conglomerate_test'] = np.where(site_container.df['conglomerate_test'].isnull(), 25, site_container.df['conglomerate_test'])
site_container.df[:5]


Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,10.0,135.0,This study,25,,293.1,34.5,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,10.0,135.0,This study,25,,289.8,43.6,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,,...,48.626429,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.894142,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,1,31.65155202,185.5745062,1.0
2,12.0,140.0,This study,25,,290.6,31.9,243.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,2,,,
2,12.0,140.0,This study,25,,285.7,42.0,243.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,2,,,


In [69]:
# rename one of the Contribution's sites
con.rename_item('sites', '16', 'extra_special_site')
con.tables['sites'].df.ix[['extra_special_site']]
# all rows previously named '16' are now named 'extra_special_site'

Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
extra_special_site,45.0,110.0,This study,,,107.7,-9.3,125.0,6,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,extra_special_site,,,
extra_special_site,45.0,110.0,This study,,,106.0,-54.2,125.0,6,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,extra_special_site,,,
extra_special_site,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 16,106.0,-54.2,,1,,...,48.639491,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.89768,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,extra_special_site,35.26202829,196.5441708,1.0


In [70]:
# additionally, '16' has been replaced in the location table under site_names
con.tables['locations'].df.ix[["Osler Volcanics, Nipigon Strait, Lower Reversed"]][['site_names']]

Unnamed: 0_level_0,site_names
location_name,Unnamed: 1_level_1
"Osler Volcanics, Nipigon Strait, Lower Reversed",
"Osler Volcanics, Nipigon Strait, Lower Reversed",6:7:8:9:10:11:12:13:14:15:extra_special_site:1...


In [9]:

reload(nb)

site_container, site_container.df = reset_site()

# get di block, providing index
print site_container.get_di_block(do_index=True, item_names=['1', '2'], tilt_corr='100')

# get di block, providing slice
print site_container.get_di_block(site_container.df.ix[['1', '2']])



[[289.8, 43.6], [285.7, 42.0]]
[[289.8, 43.6], [285.7, 42.0]]


In [10]:
# initialize a new site with no values

def add_blank_item(df, label):
    col_labels = df.columns
    blank_item = pd.Series({}, index=col_labels, name=label)
    df = df.append(blank_item)

    
site_container.add_blank_row('blank_site')
site_container.df = site_container.df
site_container.df.tail()

Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
29,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 29,129.1,-54.5,,1.0,,...,48.679736,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.897319,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,29.0,50.54048276,181.0980435,1.0
30,6.0,129.0,This study,,,98.7,-75.8,267.0,5.0,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,30.0,,,
30,6.0,129.0,This study,,,80.5,-80.4,267.0,5.0,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,30.0,,,
30,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 30,80.5,-80.4,,1.0,,...,48.593955,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.782033,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,30.0,42.49393304,246.4019953,1.0
blank_site,,,,,,,,,,,...,,,,,,,,,,


In [11]:
# copy a site from the site DataFrame, 
#change a few values, 
#then add the new site to the site DataFrame
new_site = site_container.df.ix[2]
new_site['bed_dip'] = "other"
new_site.name = 'new_site'
site_container.df = site_container.df.append(new_site)
site_container.df.tail()

Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
30,6,129.0,This study,,,98.7,-75.8,267.0,5.0,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,30.0,,,
30,6,129.0,This study,,,80.5,-80.4,267.0,5.0,r,...,,Basalt,"Osler Volcanics, Nipigon Strait, Lower Reversed",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,30.0,,,
30,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 30,80.5,-80.4,,1.0,,...,48.593955,,"Osler Volcanics, Nipigon Strait, Lower Reversed",271.782033,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,30.0,42.49393304,246.4019953,1.0
blank_site,,,,,,,,,,,...,,,,,,,,,,
new_site,other,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1.0,,...,48.626429,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.894142,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,1.0,31.65155202,185.5745062,1.0


In [12]:
# get location DataFrame

loc_df = con.tables['locations'].df

In [13]:
# get list of all sites with the same 'location_name'
name = loc_df.ix[1].name
site_container.df[site_container.df['location_name'] == name].index

Index([u'1', u'1', u'1', u'2', u'2', u'2', u'3', u'3', u'3', u'4', u'4', u'4',
       u'5', u'5', u'5', u'new_site'],
      dtype='object', name=u'site_name')

In [14]:
# get all sites belonging to a particular location RECORD (i.e., what used to be a result)
# (NOT all sites with the same location name)
name = loc_df.ix[1].name
loc_record = loc_df.ix[name].ix[1]
site_names = loc_record['site_names']
print loc_record['site_names']
site_names = site_names.split(":")
# fancy indexing
site_container.df.ix[site_names]



1:2:3:4:5


Unnamed: 0_level_0,bed_dip,bed_dip_direction,citations,conglomerate_test,description,dir_dec,dir_inc,dir_k,dir_n_samples,dir_polarity,...,lat,lithologies,location_name,lon,method_codes,result_type,site_name,vgp_lat,vgp_lon,vgp_n_samples
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,10,135.0,This study,,,293.1,34.5,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,10,135.0,This study,,,289.8,43.6,517.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,1,,,
1,other,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,,...,48.626429,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.894142,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,1,31.65155202,185.5745062,1.0
2,12,140.0,This study,,,290.6,31.9,243.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,2,,,
2,12,140.0,This study,,,285.7,42.0,243.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,2,,,
2,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 2,285.7,42.0,,1,,...,48.637185,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.913846,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,2,28.11008553,187.514758,1.0
3,14,146.0,This study,,,310.0,24.6,2485.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,3,,,
3,14,146.0,This study,,,307.4,37.9,2485.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,3,,,
3,,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 3,307.4,37.9,,1,,...,48.640811,,"Osler Volcanics, Nipigon Strait, Upper Normal",271.928039,DE-DI:FS-LOC-GOOGLE:LP-DC2,i,3,40.26060376,167.8862917,1.0
4,12,135.0,This study,,,303.9,18.2,331.0,5,n,...,,Basalt,"Osler Volcanics, Nipigon Strait, Upper Normal",,DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM,,4,,,


In [15]:
# Get names if all sites with a particular method code

import re
meth_code = 'DE-K'
pattern = re.compile('{}(?=:|\s|\Z)'.format(meth_code))
# use regex to see if the pattern shows up in the method codes col
# (must use fillna to replace np.nan with False for indexing)
cond = site_container.df['method_codes'].str.contains(pattern).fillna(False)
# print all site records with that method code: 
print site_container.df[cond]['method_codes'].index
# and all WITHOUT that method code:
print site_container.df[~cond]['method_codes'].index


Index([u'2', u'3', u'3', u'4', u'4', u'5', u'5', u'6', u'6', u'7', u'7', u'8',
       u'8', u'9', u'9', u'10', u'10', u'11', u'11', u'12', u'12', u'13',
       u'13', u'14', u'14', u'15', u'15', u'16', u'16', u'17', u'17', u'18',
       u'18', u'19', u'19', u'20', u'20', u'21', u'21', u'22', u'22', u'23',
       u'23', u'24', u'24', u'25', u'25', u'26', u'26', u'27', u'27', u'28',
       u'28', u'29', u'29', u'30', u'30'],
      dtype='object', name=u'site_name')
Index([u'1', u'1', u'1', u'2', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9',
       u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19',
       u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28', u'29',
       u'30', u'blank_site', u'new_site'],
      dtype='object', name=u'site_name')


In [16]:
# Rename a site

# first, rename the index on the site_container.df
site_name = '1'
new_name = '111'
site_container.df.rename(index={site_name: new_name}, inplace=True)
# now, we need to fix the name in the location + sample tables

#loc_df.site_names.isin(['1:2:3:4:5'])

# next, define some helper methods:
def split_if_str(x):
    if isinstance(x, str):
        return x.split(':')
    else:
        return x
    
def put_together_if_str(x):
    try:
        return ":".join(x)
    except TypeError:
        return x
    
def replace_colon_delimited_value(df, col_name, old_value, new_value):
    """
    Col must contain list
    """
    for index, row in df[df[col_name].notnull()].iterrows():
        names_list = row[col_name]
        try:
            ind = names_list.index(old_value)
        except ValueError:
            continue
        names_list[ind] = new_value

# now, split colon-delimited string into list, make new column of site_names as list, not string
loc_df['site_names_list'] = loc_df['site_names'].apply(split_if_str)         

# replace old value with new wherever it occurs in site_names_list
replace_colon_delimited_value(loc_df, "site_names_list", site_name, new_name)

# put list back into string form and update site_names
loc_df['site_names'] = loc_df['site_names_list'].apply(put_together_if_str)

# get rid of temporary column site_names_list
del loc_df['site_names_list']

# ta-da
loc_df

# would also need to change names in sample table, age table, and image table

Unnamed: 0_level_0,citations,conglomerate_test,contact_test,continent_ocean,country,description,dir_alpha95,dir_dec,dir_inc,dir_k,...,pole_comp_name,pole_lat,pole_lon,pole_n_sites,region,result_type,reversal_test,rock_magnetic_test,site_names,tectonic_settings
location_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Osler Volcanics, Nipigon Strait, Lower Reversed",This study,,,North America,Canada,,,,,,...,,,,,Lake Superior,,,,,Continental Rift
"Osler Volcanics, Nipigon Strait, Upper Normal",This study,,,North America,Canada,,,,,,...,,,,,Lake Superior,,,,,Continental Rift
"Osler Volcanics, Nipigon Strait, Lower Reversed",Cox & Doell 1960:Fisher 1953:McElhinny & McFad...,ND,ND,,,All Reversed Flows,4.624479368,114.967445,-57.57295621,40.16441995,...,Characteristic,33.85573654,178.2632514,5.0,,a,ND,ND,6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22...,
"Osler Volcanics, Nipigon Strait, Upper Normal",Cox & Doell 1960:Fisher 1953:McElhinny & McFad...,ND,ND,,,All Normal Flows,8.47928537,296.4616437,39.46409514,82.38571031,...,Characteristic,43.6574194,196.3384635,25.0,,a,ND,ND,111:2:3:4:5,


In [17]:
# merge location and site DataFrames
# basically will be site_container.df but with all of the relevant location
pd.merge(site_container.df, loc_df, 'inner', right_index=True, left_on=['location_name'])

Unnamed: 0_level_0,location_name,bed_dip,bed_dip_direction,citations_x,conglomerate_test_x,description_x,dir_dec_x,dir_inc_x,dir_k_x,dir_n_samples,...,pole_comp_name,pole_lat,pole_lon,pole_n_sites,region,result_type_y,reversal_test,rock_magnetic_test,site_names,tectonic_settings
site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
111,"Osler Volcanics, Nipigon Strait, Upper Normal",10,135,This study,,,293.1,34.5,517,5,...,,,,,Lake Superior,,,,,Continental Rift
111,"Osler Volcanics, Nipigon Strait, Upper Normal",10,135,This study,,,293.1,34.5,517,5,...,Characteristic,43.6574194,196.3384635,25,,a,ND,ND,111:2:3:4:5,
111,"Osler Volcanics, Nipigon Strait, Upper Normal",10,135,This study,,,289.8,43.6,517,5,...,,,,,Lake Superior,,,,,Continental Rift
111,"Osler Volcanics, Nipigon Strait, Upper Normal",10,135,This study,,,289.8,43.6,517,5,...,Characteristic,43.6574194,196.3384635,25,,a,ND,ND,111:2:3:4:5,
111,"Osler Volcanics, Nipigon Strait, Upper Normal",other,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,...,,,,,Lake Superior,,,,,Continental Rift
111,"Osler Volcanics, Nipigon Strait, Upper Normal",other,,Cox & Doell 1960:McElhinny & McFadden 2000,ND,VGP:Site 1,289.8,43.6,,1,...,Characteristic,43.6574194,196.3384635,25,,a,ND,ND,111:2:3:4:5,
2,"Osler Volcanics, Nipigon Strait, Upper Normal",12,140,This study,,,290.6,31.9,243,5,...,,,,,Lake Superior,,,,,Continental Rift
2,"Osler Volcanics, Nipigon Strait, Upper Normal",12,140,This study,,,290.6,31.9,243,5,...,Characteristic,43.6574194,196.3384635,25,,a,ND,ND,111:2:3:4:5,
2,"Osler Volcanics, Nipigon Strait, Upper Normal",12,140,This study,,,285.7,42,243,5,...,,,,,Lake Superior,,,,,Continental Rift
2,"Osler Volcanics, Nipigon Strait, Upper Normal",12,140,This study,,,285.7,42,243,5,...,Characteristic,43.6574194,196.3384635,25,,a,ND,ND,111:2:3:4:5,


## Gotchas

1.  If you reassign site_container.df to it's own variable....


## Pandas indexing

In [18]:
# first site
print site_container.df.ix[0][:5]
print '-'
# find site by index value
print site_container.df.ix['new_site'][:5]
print '-'
# return all sites' values for a col
print site_container.df['bed_dip'][:5]

bed_dip                      10
bed_dip_direction           135
citations            This study
conglomerate_test           NaN
description                 NaN
Name: 111, dtype: object
-
bed_dip                                                   other
bed_dip_direction                                           NaN
citations            Cox & Doell 1960:McElhinny & McFadden 2000
conglomerate_test                                            ND
description                                          VGP:Site 1
Name: new_site, dtype: object
-
site_name
111       10
111       10
111    other
2         12
2         12
Name: bed_dip, dtype: object


# Object oriented way

In [19]:
# get 3.0. data_model locally as it is not yet available on earthref
def get_data_model():
    model_file = os.path.join('3_0', 'MagIC Data Model v3.0 - unpublished.json')
    f = open(model_file, 'r')
    string = '\n'.join(f.readlines())
    raw = json.loads(unicode(string, errors='ignore'))
    full = DataFrame(raw)
    return full

In [20]:
dmodel = get_data_model()
# create builder object to contain all data from a contribution
b = builder.ErMagicBuilder(working_dir, dmodel)
# run method for reading in all available data to builder object
b.get_all_magic_info()

-I- No magic_measurements.txt file
-I- Getting specimen info
-I- Attempting to read /Users/nebula/Python/PmagPy/3_0/specimens.txt
-W- Could not find /Users/nebula/Python/PmagPy/3_0/specimens.txt
-I- Getting sample info
-I- Attempting to read /Users/nebula/Python/PmagPy/3_0/samples.txt
-W- Could not find /Users/nebula/Python/PmagPy/3_0/samples.txt
-I- Getting site info
-I- Attempting to read /Users/nebula/Python/PmagPy/3_0/sites.txt
-I- Getting location info
-I- Attempting to read /Users/nebula/Python/PmagPy/3_0/locations.txt
-W- Could not find /Users/nebula/Python/PmagPy/3_0/ages.txt


In [21]:
# builder object includes a list of all sites
b.sites[:5]

[site: 24, site: 25, site: 26, site: 27, site: 20]

In [22]:
# data for an individual site
b.sites[0].data

{'bed_dip': '',
 'bed_dip_direction': '',
 'citations': 'Cox & Doell 1960:McElhinny & McFadden 2000',
 'conglomerate_test': 'ND',
 'description': 'VGP:Site 24',
 'dir_dec': 116.8,
 'dir_inc': '-40.4',
 'dir_k': '',
 'dir_n_samples': '1',
 'dir_polarity': '',
 'dir_tilt_correction': '100',
 'geologic_classes': '',
 'geologic_types': '',
 'lat': '48.678789',
 'lithologies': '',
 'location_name': 'Osler Volcanics, Nipigon Strait, Lower Reversed',
 'lon': '271.942681',
 'method_codes': 'DE-DI:FS-LOC-GOOGLE:LP-DC2',
 'result_type': 'i',
 'site_name': '24',
 'vgp_lat': '34.61096611',
 'vgp_lon': 178.2474147,
 'vgp_n_samples': '1'}

In [23]:
# an example method for data manipulation -- this one changes all site data to include a given key, value pair
# it could also be used to update an existing key, value pair

def set_key(key, value, lst):
    for i in b.sites:
        i.data[key] = value
        
set_key('new_key', 'new_value', b.sites)

In [24]:
b.sites[0].data['new_key']

'new_value'

In [25]:
# adding a new site to the builder object

b.add_site('new_site', None, data=b.sites[0].data)
b.sites[-5:]

[site: 16, site: 19, site: 18, site: 30, site: new_site]