# The SQL database for Stellab

SQL code to create stellab database. The table structure is shown below.

Tables:
* refs: Table with literature references:
    * refid: unique reference id (PK)
    * fauthor: first author
    * year
    * nasaads: link to nasa ads page

* galaxies
    * gali: unique ref id
    * name: galaxy name

* solarnorms
    * normid: unique reference id (PK)
    * H, He... abundance entries
    * refids
* abu_table_reg
    * abu_reg_id uniqure reference
    * refid to look up literature
    * table name to look up abundance table
    * normid to look up normalization used

* multiple abundance tables, all with
    * abuid: unique reference id
    * element ratio1, element ratio2...
    * error1, errror2 ...

Tips:
* Use not null
* fixed lenght type arrays are faster

In [1]:
#We choose sqlite3 because it does not require to install mysql or run a server.
import sqlite3
import pandas as pd
import stellab as st

SQLite is a C library that provides a lightweight disk-based database that doesn’t require a separate server process and allows accessing the database using a nonstandard variant of the SQL query language. 

Creating database and table refs

### Add references

In [2]:
st1=st.stellab()
#papers=st1.list_ref_papers()
legs=st1.leg

name=[]
year=[]
for i,labels in enumerate(legs):
    name.append(labels.split('(')[0].strip())
    year.append(labels.split('(')[1].strip()[:-1])

In [3]:
st1.list_solar_norm()

Anders_Grevesse_1989
Grevesse_Noels_1993
Grevesse_Sauval_1998
Asplund_et_al_2009
Asplund_et_al_2005
Lodders_et_al_2009


## I. Code to build database

In [4]:
#create table
!rm stellab.db
db=sqlite3.connect('stellab.db')
curser=db.cursor()
curser.execute('''CREATE TABLE IF NOT EXISTS refs (refid integer primary key autoincrement, fauthor text, year year, nasads text)''')
curser.execute('''CREATE TABLE IF NOT EXISTS galaxies (galid integer primary key autoincrement, name text)''')
curser.execute('''CREATE TABLE IF NOT EXISTS solarnorms (normid integer primary key autoincrement, H float, He float, refid integer)''')
curser.execute('''CREATE TABLE IF NOT EXISTS abu_table_reg (abu_reg_id integer primary key autoincrement, refid integer, normid integer,abutable text)''')
db.commit()

In [5]:
def get_column_names(table):
    tmp=db.execute("PRAGMA table_info(%s)" % table).fetchall()
    return [entry[1] for entry in tmp]

In [19]:
def check_solar_normalization(name,year):
    '''
    Check if normalization data is available in table solarnorms. Only for internal use.
    '''        
    #check if paper for normalization data exists already.
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % (name,year))  #% ('Anders','1989'))
    refid = results.fetchall()
    if len(refid)==1:
        #print 'Paper related to solar normalization is available.'
        refid = refid[0][0]
        results = db.execute('''SELECT normid FROM solarnorms WHERE (refid = %s )''' %(refid))
        normid = results.fetchall()
        if len(normid)==1:
            print 'found corresponding normalization data.'
            return normid[0][0]
        else:
            #add normalization data
            print 'normalization data is missing. Add the data first by using add_normalization().'
            return -1
    else:
        print 'Solar normalization data is not in database. Add the data first by using add_normalization().'
        #print 'paper for normalization not available. Add paper reference first.'
        return -1

In [7]:
def add_normalization(norm_paper,norm_label,norm_data):
    '''
    Add data to normalization table solarnorms and corresponding paper to table refs.
    E.g.
    norm_paper=['Venn',2012,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V']
    norm_label=['H','He']
    norm_data = [-2.81,0.34,]

    '''
    #check if paper for normalization data exists already.
    name=norm_paper[0]
    year=norm_paper[1]
    nasads=norm_paper[2]
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % (name,year))  #% ('Anders','1989'))
    refid = results.fetchall()

    if len(refid)==0:
        print 'add paper for solar normalization to table refs.'
        add_paper_ref(name,year,nasads)
        results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % (name,year))  #% ('Anders','1989'))
        refid = results.fetchall()        
        
    refid = refid[0][0]
    results = db.execute('''SELECT normid FROM solarnorms WHERE (refid = %s )''' %(refid))
    normid = results.fetchall()
    
    if len(normid)==1:
        print 'normalization data is already available.'
    else:
            
        #check if columns of solarnorms include matches to norm_label
        columns= get_column_names('solarnorms') 
        for k in range(len(norm_label)):
            if not norm_label[k] in columns:
                #print norm_label[k],'missing in columns, add new column'
                db.execute('''ALTER TABLE solarnorms ADD %s float''' %norm_label[k])
                    
        #add normalization data
        str_tmp = 'refid, '
        #print norm_label
        #print norm_data
        for k in range(len(norm_label)):
                str_tmp+= (norm_label[k] + ',')
        str_tmp = str_tmp[:-1]
        sql_prepr = tuple([str_tmp])
        #print 'test 1',sql_prepr
        str_tmp = str(refid)+', '
        for k in range(len(norm_data)):
                str_tmp+= str(norm_data[k]) + ','
        str_tmp = str_tmp[:-1]
        sql_prepr = sql_prepr + tuple([str_tmp])
        #print sql_prepr
        db.execute('''INSERT INTO solarnorms (%s) VALUES (%s)''' % sql_prepr)


In [8]:
def add_paper_ref(name,year,nasads):
    '''
    Add a new paper to the table refs.
    e.g.
    name='Anders'
    year=1989
    nasads='http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G'
    '''   
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % 
                         (name,year))
    results = results.fetchall()
    if len(results)==0:
            print 'add paper related to abundance to table refs.'
            db.execute('''INSERT INTO refs (fauthor, year,nasads) VALUES ('%s','%s','%s') ''' % (name,year,nasads))
            db.commit()
            #key=db.execute('''SELECT LAST_INSERT_ID()''')
            results=db.execute('''SELECT refid FROM refs WHERE (fauthor='%s' and year='%s') ''' % (name,year))
            ref_id=results.fetchall()[0][0]
    else:
        print 'abu data paper exists already. do nothing'
        ref_id = results[0][0]
    return ref_id

In [25]:
def add_abundance_data(abu_paper,abu_norm,abu_label,abu_data):
    '''
    Adding new abundance data to database. Creates new table. Requires corresponding solar normalization to be available
    in database.
    e.g.
    abundance_paper=['Venn',2012,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V','Milky Way']
    abundance_norm=['Anders','1989']
    abundance_label=['[Fe/H]','err']
    abundance_data = [-2.81,0.34]    
    '''
    
    ######check if solar normalization paper and data already exists in database
    norm_id = check_solar_normalization(abu_norm[0],abu_norm[1])
    if norm_id == -1: return
    
    ######check if abundance paper into database, if it does not exist, add it.
    ref_id = add_paper_ref(abu_paper[0],abu_paper[1],abu_paper[2])

    ###### add abundance data    
    table_name = ''
    #check if table already exists in registry table for abundance tables
    results = curser.execute('''SELECT abu_reg_id FROM abu_table_reg WHERE (normid = '%s' and refid = '%s')''' % 
                             (norm_id,ref_id))
    results = results.fetchall()
    if len(results)==0:
        
        #get the latest key entry
        abu_reg_ids = curser.execute('''SELECT abu_reg_id FROM abu_table_reg''')
        abu_reg_ids = abu_reg_ids.fetchall()

        if len(abu_reg_ids)==0:
            abu_reg_id = 0
        else:
            abu_reg_id = abu_reg_ids[0][-1]
        #create new key entry
        abu_reg_id = abu_reg_id + 1
        
        #name table according to numbering of abu_reg_id
        tablename = 'abu_table_%s' % abu_reg_id
        #add new table
                             
        #create table
        str_tmp=''
        for k in range(len(abu_label)):
            str_tmp+=' , '+abu_label[k] +' float '
                             
        sql_prepr = tuple([tablename])+tuple([str_tmp])
                             
        #create database
        #print 'create abu table entry'
        curser.execute('''CREATE TABLE IF NOT EXISTS %s (abuid integer primary key autoincrement %s )''' %
                            sql_prepr)
                             
        #### add abundance data: 1 entry line        
        str_tmp1=''
        for k in range(len(abu_label)):
                str_tmp1+=abu_label[k]+','
        str_tmp1 = str_tmp1[:-1]
        str_tmp2=''
        for k in range(len(abu_data)):
                str_tmp2+=str(abu_data[k])+','
        str_tmp2 = str_tmp2[:-1]        
        
        #create tuple for SQL input
        sql_prepr = tuple([tablename]) + tuple([str_tmp1]) + tuple([str_tmp2])
        #print sql_prepr           
        curser.execute('''INSERT INTO %s (%s) VALUES (%s)''' %sql_prepr)
        
        #insert into table registry last, after abundance table was created successfully.                     
        curser.execute('''INSERT INTO abu_table_reg (refid, normid,abutable) VALUES ('%s','%s','%s') ''' %
                             (ref_id,norm_id,tablename))
        #print 'Table ',sql_prepr,' , ',' created!'
    else:
        print 'abundance table with same paper reference and solar normalization exists already! Do nothing.'

#### the following functions are for data retrieval and display (we use popular pandas package)

In [10]:
# style of pandas sheet to allow nasads link to be clickable
def make_clickable(val):
    '''
    internal function for pandas display
    '''
    if 'http' in str(val):
        return '<a href="{}">{}</a>'.format(val,val)
    else:
        return val

In [11]:
def get_solar_normalizations(norm_paper=[],data_x_y=False):
    '''
    Access either specific solar normalization data, specified through norm_paper or all solar normalization
    data when norm_paper=[].
    e.g. norm_paper=['Anders',1989]
    '''
    
    all_data= db.execute('''SELECT sn.*,r.fauthor,r.year,r.nasads FROM solarnorms sn INNER JOIN refs as r ON sn.refid = r.refid''').fetchall()
    columns = get_column_names('solarnorms')
    columns=columns + ['fauthor','year','nasaads']
 
    if not len(norm_paper) == 0:
        idx=-1
        for k in range(len(all_data)):
            print all_data[k]
            if norm_paper[0] in all_data[k] and norm_paper[1] in all_data[k]:
                idx=k
                break
        if idx == -1: return 'normalization table not found.'
        data = [all_data[idx]]
    else:
        data = all_data
    if data_x_y:
        return columns,data        
    else:    
        return pd.DataFrame(data=data,columns=columns).style.format(make_clickable)

In [12]:
def get_paper_refs():
    '''
    Access all the paper references available in the database.
    '''
    data= db.execute('''SELECT * FROM refs''').fetchall()
    columns = get_column_names('refs')
    return pd.DataFrame(data=data,columns=columns).style.format(make_clickable)

In [13]:
def get_overview_abundance_tables(data_x_y=False):
    '''
    Overview over all available abundance tables. 
    '''
    data= db.execute('''SELECT a.*,r.fauthor,r.year,r.nasads FROM abu_table_reg a INNER JOIN refs r ON a.refid = r.refid''').fetchall()
    columns = get_column_names('abu_table_reg')
    columns=columns + ['fauthor','year','nasaads']
    if data_x_y:
        return columns,data
    
    else:
        return pd.DataFrame(data=data,columns=columns).style.format(make_clickable)

In [14]:
def get_abundance_data(abu_paper,data_x_y=False):
    '''
    Access abundance data from specific paper abu_paper
    e.g. abu_paper=['Venn',2012]   
    '''
    
    #check for availability
    columns,all_data = get_overview_abundance_tables(data_x_y=True)
    idx=-1
    for k in range(len(all_data)):
        if abu_paper[0] in all_data[k] and abu_paper[1] in all_data[k]:
            idx=k
            break
    if idx==-1: return 'abundance table not found in database.'
    data= all_data[idx]
    idx=columns.index('abutable')
    tablename = data[idx]
    #get the abundance data from specific table tablename
    data= db.execute('''SELECT * FROM %s ''' % tablename).fetchall()    
    columns=get_column_names(tablename)
    if data_x_y:
        return columns,data
    else:
        return pd.DataFrame(data=data,columns=columns)   

## II. Examples how to add and retrieve data.

#### Before we add abundance data we need to add a related solar normalization data and (paper) reference. The solar normalization data goes in one table (solarnorms table) and the paper reference in another (refs table).

In [15]:
#add normalization data
normalization_paper=['Anders',1989,'http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G']
normalization_label=['H','He','Fe']
normalization_data = [-2.81,0.34,0.01]
add_normalization(normalization_paper,normalization_label,normalization_data)

add paper for solar normalization to table refs.
add paper related to abundance to table refs.


check correct input:

In [16]:
#did we successfully created database entry in solar normalization table?
get_solar_normalizations()

Unnamed: 0,normid,H,He,refid,Fe,fauthor,year,nasaads
0,1,-2.81,0.34,1,0.01,Anders,1989,http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G


In [17]:
#lets see if there is an entry for the paper referene in the refs table
get_paper_refs()

Unnamed: 0,refid,fauthor,year,nasads
0,1,Anders,1989,http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G


#### Add one abundance data table. A new table with abundance data is created and an entry added to the abu_table_reg table. The latter table holds the names and info about all abundance data tables.

In [20]:
#add abundance data
abundance_paper=['Venn',2012,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V','Milky Way']
#corresponding normalization for abundance data. Must be existant in database, else use add_normalization() above.
abundance_norm=['Anders','1989']
#plain data
abundance_label=['[Fe/H]','err']
abundance_data = [-2.81,0.34]
add_abundance_data(abundance_paper,abundance_norm,abundance_label,abundance_data)

found corresponding normalization data.
abu data paper exists already. do nothing
abundance table with same paper reference and solar normalization exists already! Do nothing.


In [21]:
#check if table exists
get_abundance_data(abu_paper=abundance_paper)

Unnamed: 0,abuid,Fe/H,err
0,1,-2.81,0.34


In [22]:
#check if table exists in abu_table_reg, the registration table. 
#refid is the id which refers to the paper reference in the refs table which holds all references.
get_overview_abundance_tables()

Unnamed: 0,abu_reg_id,refid,normid,abutable,fauthor,year,nasaads
0,1,2,1,abu_table_1,Venn,2012,http://adsabs.harvard.edu/abs/2012ApJ...751..102V


In [23]:
#lets see the refs table. Indeed the refid of 2 refers to the Venn et al paper.
get_paper_refs()

Unnamed: 0,refid,fauthor,year,nasads
0,1,Anders,1989,http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G
1,2,Venn,2012,http://adsabs.harvard.edu/abs/2012ApJ...751..102V


#### Example to add a second abundance data table

In [26]:
#add abundance data
abundance_paper=['BeersFun',2008,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V','Milky Way']
abundance_norm=['Anders','1989']
abundance_label=['[Fe/H]','err']
abundance_data = [-99,0.8]
add_abundance_data(abundance_paper,abundance_norm,abundance_label,abundance_data)

found corresponding normalization data.
abu data paper exists already. do nothing
abundance table with same paper reference and solar normalization exists already! Do nothing.


In [27]:
#as expected, the new table exists
get_abundance_data(abu_paper=abundance_paper)

Unnamed: 0,abuid,Fe/H,err
0,1,-99.0,0.8


In [28]:
#check if table exists in abu_table_reg, the registration table. Yes, it exists as the second entry.
get_overview_abundance_tables()

Unnamed: 0,abu_reg_id,refid,normid,abutable,fauthor,year,nasaads
0,1,2,1,abu_table_1,Venn,2012,http://adsabs.harvard.edu/abs/2012ApJ...751..102V
1,2,3,1,abu_table_2,BeersFun,2008,http://adsabs.harvard.edu/abs/2012ApJ...751..102V


### Add galaxies

In [None]:
#create table
db=sqlite3.connect('stellab.db')
curser=db.cursor()
curser.execute('''CREATE TABLE IF NOT EXISTS galaxies (galid integer primary key autoincrement, name text)''')

In [15]:
#add data
#db.commit()

In [None]:
#add data
curser.execute('''CREATE TABLE IF NOT EXISTS solarnorms (normid integer primary key autoincrement, H float, He float, refid integer)''')
