This notebook explores the remote database and using native pandas to represent time.

In [2]:
import json
import requests
import pandas as pd
import numpy as np
import io

## Querying for retrograde data

This query return information about age. Let's first use 'years BP' or 'BP' to look for data

In [3]:
url = 'https://linkedearth.graphdb.mint.isi.edu/repositories/LiPDVerse'

query = """
PREFIX le: <http://linked.earth/ontology#>
select ?val ?timeval ?varunits ?timeunits ?dsname ?varname ?timevarname where { 
    ?ds le:name ?dsname .
    ?ds le:includesPaleoData ?data .   
    ?data le:foundInMeasurementTable ?table .
    ?table le:includesVariable ?var .
    ?var le:name ?varname .
    FILTER (?varname != "age")
    FILTER (?varname != "year")
    ?var le:hasVariableID ?varID .
    ?var le:hasValues ?val .
        OPTIONAL{?var le:hasUnits ?varunits .}
    ?table le:includesVariable ?timevar .
    ?timevar le:name ?timevarname .
        VALUES ?timevarname {"age"} .
    ?timevar le:hasValues ?timeval .
    ?timevar le:hasUnits ?timeunits .
        VALUES ?timeunits {"yr BP" "BP"}
}
LIMIT 100
"""
response = requests.post(url, data = {'query': query})

data = io.StringIO(response.text)
df = pd.read_csv(data, sep=",")

# Make list from the values string
df['val']=df['val'].apply(lambda row : np.fromstring(row.strip("[]"), sep=','))
df['timeval']=df['timeval'].apply(lambda row : np.fromstring(row.strip("[]"), sep=','))

df.head()

  df['val']=df['val'].apply(lambda row : np.fromstring(row.strip("[]"), sep=','))


Unnamed: 0,val,timeval,varunits,timeunits,dsname,varname,timevarname
0,"[20.4, 20.4, 20.7, 20.6, 20.5, 20.2, 19.3, 19....","[0.0, 13.0, 26.0, 39.0, 52.0, 64.0, 77.0, 90.0...",degC,BP,NAm-DarkLake.Gajewski.1988,temperature,age
1,"[18.4, 18.4, 19.1, 18.4, 18.9, 18.7, 18.6, 19....","[-10.0, 24.0, 58.0, 93.0, 127.0, 191.0, 248.0,...",degC,BP,NAm-ClearPond.Gajewski.1988,temperature,age
2,[],"[-44.0, -34.0, -13.0, 7.0, 18.0, 34.0, 45.0, 5...",,BP,Eur-CentralandEasternPyrenees.Pla.2004,sampleID,age
3,"[0.0, 0.09114, -0.19458, 0.07387, -0.42006, -0...","[-44.0, -34.0, -13.0, 7.0, 18.0, 34.0, 45.0, 5...",degC,BP,Eur-CentralandEasternPyrenees.Pla.2004,temperature,age
4,"[0.13984, 0.15345, 0.16085, 0.13493, 0.14066, ...","[-44.0, -34.0, -13.0, 7.0, 18.0, 34.0, 45.0, 5...",degC,BP,Eur-CentralandEasternPyrenees.Pla.2004,uncertainty_temperature,age


In [7]:
arr2 = np.array(df['timeval'].iloc[0], dtype="M8[s]").view("M8[Y]")

arr2

array(['1970', '1983', '1996', '2009', '2022', '2034', '2047', '2060',
       '2073', '2086', '2092', '2099', '2105', '2111', '2128', '2145',
       '2159', '2171', '2184', '2193', '2211', '2265', '2315', '2359',
       '2403', '2440', '2482', '2515', '2546', '2575', '2602', '2630',
       '2657', '2688', '2720', '2756', '2794', '2834', '2876'],
      dtype='datetime64[Y]')