# Building SQL database for Stellab

SQL code to create stellab database

Tables:
* refs: Table with literature references:
    * refid: unique reference id (PK)
    * fauthor: first author
    * year
    * nasaads: link to nasa ads page

* galaxies
    * gali: unique ref id
    * name: galaxy name

* solarnorms
    * normid: unique reference id (PK)
    * H, He... abundance entries
    * refids
* abu_table_reg
    * abu_reg_id uniqure reference
    * refid to look up literature
    * table name to look up abundance table
    * normid to look up normalization used

* multiple abundance tables, all with
    * abuid: unique reference id
    * element ratio1, element ratio2...
    * error1, errror2 ...

Tips:
* Use not null
* fixed lenght type arrays are faster

In [1]:
#pip install pymysql

In [2]:
#We choose sqlite3 because it does not require to install mysql or run a server.
import sqlite3
import stellab as st

SQLite is a C library that provides a lightweight disk-based database that doesn’t require a separate server process and allows accessing the database using a nonstandard variant of the SQL query language. 

Creating database and table refs

### Add references

In [38]:
st1=st.stellab()
#papers=st1.list_ref_papers()
legs=st1.leg

name=[]
year=[]
for i,labels in enumerate(legs):
    name.append(labels.split('(')[0].strip())
    year.append(labels.split('(')[1].strip()[:-1])

In [39]:
st1.list_solar_norm()

Anders_Grevesse_1989
Grevesse_Noels_1993
Grevesse_Sauval_1998
Asplund_et_al_2009
Asplund_et_al_2005
Lodders_et_al_2009


In [5]:
data=[['Venn',2012,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V']]

In [106]:
#create table
!rm stellab.db
db=sqlite3.connect('stellab.db')
curser=db.cursor()
curser.execute('''CREATE TABLE IF NOT EXISTS refs (refid integer primary key autoincrement, fauthor text, year year, nasads text)''')
curser.execute('''CREATE TABLE IF NOT EXISTS galaxies (galid integer primary key autoincrement, name text)''')
curser.execute('''CREATE TABLE IF NOT EXISTS solarnorms (normid integer primary key autoincrement, H float, He float, refid integer)''')
curser.execute('''CREATE TABLE IF NOT EXISTS abu_table_reg (abu_reg_id integer primary key autoincrement, refid integer, normid integer,abutable text)''')
db.commit()

In [107]:
#curser.execute('''INSERT INTO refs (fauthor,year,nasads) VALUES 
#        ('Anders','1989', 'http://adsabs.harvard.edu/abs/2012ApJ')''')

In [108]:
def get_column_names(table):
    tmp=db.execute("PRAGMA table_info(%s)" % table).fetchall()
    return [entry[1] for entry in tmp]

In [205]:
def check_normalization(name,year):
    '''
    Check if normalization data is available in database.
    '''        
    #check if paper for normalization data exists already.
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % (name,year))  #% ('Anders','1989'))
    refid = results.fetchall()
    if len(refid)==1:
        print 'Normalization paper is available.'
        refid = refid[0][0]
        results = db.execute('''SELECT normid FROM solarnorms WHERE (refid = %s )''' %(refid))
        normid = results.fetchall()
        if len(normid)==1:
            print 'found corresponding normalization data.'
            return normid[0][0]
        else:
            #add normalization data
            print 'normalization data is missing. Add the data first by using add_normalization().'
            return -1
    else:
        print 'normalization data is missing. Add the data first by using add_normalization().'
        #print 'paper for normalization not available. Add paper reference first.'
        return -1

In [206]:
def add_normalization(norm_paper,norm_label,norm_data):
    '''
    Add data to normalization table solarnorms and corresponding paper to table refs.
    norm_paper=['Venn',2012,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V']
    norm_label=['H','He']
    norm_data = [-2.81,0.34,]

    '''
    #check if paper for normalization data exists already.
    print 'does paper entry exists alreay?'
    name=norm_paper[0]
    year=norm_paper[1]
    nasads=norm_paper[2]
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % (name,year))  #% ('Anders','1989'))
    refid = results.fetchall()

    if len(refid)==0:
        print 'add paper for normalization.'
        add_paper_ref(name,year,nasads)
        results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % (name,year))  #% ('Anders','1989'))
        refid = results.fetchall()        
        
    refid = refid[0][0]
    results = db.execute('''SELECT normid FROM solarnorms WHERE (refid = %s )''' %(refid))
    normid = results.fetchall()
    
    if len(normid)==1:
        print 'normalization data is already available.'
    else:
            
        #check if columns of solarnorms include matches to norm_label
        print 'what columns are available'
        columns= get_column_names('solarnorms') 
        print 'found columns: ',columns
        for k in range(len(norm_label)):
            if not norm_label[k] in columns:
                print norm_label[k],'missing in columns, add new column'
                db.execute('''ALTER TABLE solarnorms ADD %s float''' %norm_label[k])
                    
        #add normalization data
        str_tmp = 'refid, '
        print norm_label
        print norm_data
        for k in range(len(norm_label)):
                str_tmp+= (norm_label[k] + ',')
        str_tmp = str_tmp[:-1]
        sql_prepr = tuple([str_tmp])
        print 'test 1',sql_prepr
        str_tmp = str(refid)+', '
        for k in range(len(norm_data)):
                str_tmp+= str(norm_data[k]) + ','
        str_tmp = str_tmp[:-1]
        sql_prepr = sql_prepr + tuple([str_tmp])
        print sql_prepr
        db.execute('''INSERT INTO solarnorms (%s) VALUES (%s)''' % sql_prepr)


In [207]:
def add_paper_ref(name,year,nasads):
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % 
                         (name,year))
    results = results.fetchall()
    if len(results)==0:
            print 'add abu data paper'
            db.execute('''INSERT INTO refs (fauthor, year,nasads) VALUES ('%s','%s','%s') ''' % (name,year,nasads))
            db.commit()
            #key=db.execute('''SELECT LAST_INSERT_ID()''')
            results=db.execute('''SELECT refid FROM refs WHERE (fauthor='%s' and year='%s') ''' % (name,year))
            ref_id=results.fetchall()[0][0]
    else:
        print 'abu data paper exists already. do nothing'
        ref_id = results[0][0]
    return ref_id

In [208]:
def add_abundance(name,year,url):
    
    results = db.execute('''SELECT refid FROM refs WHERE (fauthor = '%s' and year = '%s' )''' % 
                         (name,year))
    if len(results)==0:
            print 'add abu data paper'
            #some insert ...
    else:
        print 'abu data paper exists already'

In [209]:
check_normalization('Anders','1989')

Normalization paper is available.
found corresponding normalization data.


3

In [245]:
def add_abundance_data(abu_paper,abu_norm,abu_label,abu_data):
    '''
    Adding new abundance data to file
    where for example    
    '''
    
    #check if normalization paper and data already exists in database
    norm_id = check_normalization(abu_norm[0],abu_norm[1])
    if norm_id == -1: return
    
    #check if abundance paper into database, if it does not exist, add it.
    ref_id = add_paper_ref(abu_paper[0],abu_paper[1],abu_paper[2])

    # add abundance data
    print 'FOUND norm_id: ',norm_id,' ref_id ',ref_id
    
    table_name = ''
    #check if table already exists in registry table for abundance tables
    results = curser.execute('''SELECT abu_reg_id FROM abu_table_reg WHERE (normid = '%s' and refid = '%s')''' % 
                             (norm_id,ref_id))
    results = results.fetchall()
    print 'test'
    if len(results)==0:
        
        #get the latest key entry
        abu_reg_ids = curser.execute('''SELECT abu_reg_id FROM abu_table_reg''')
        abu_reg_ids = abu_reg_ids.fetchall()

        if len(abu_reg_ids)==0:
            abu_reg_id = 0
        else:
            abu_reg_id = abu_reg_ids[0][-1]
        #create new key entry
        abu_reg_id = abu_reg_id + 1
        
        #name table according to numbering of abu_reg_id
        tablename = 'abu_table_%s' % abu_reg_id
        #add new table
                             
        #create table
        str_tmp=''
        for k in range(len(abu_label)):
            str_tmp+=' , '+abu_label[k] +' float '
                             
        sql_prepr = tuple([tablename])+tuple([str_tmp])
                             
        #create database
        print 'create abu table entry'
        curser.execute('''CREATE TABLE IF NOT EXISTS %s (abuid integer primary key autoincrement %s )''' %
                            sql_prepr)
                             
        #### add abundance data: 1 entry line        
        str_tmp1=''
        for k in range(len(abu_label)):
                str_tmp1+=abu_label[k]
        str_tmp2=''
        for k in range(len(abu_data)):
                str_tmp2+=str(abu_label[k])                
        
        #create tuple for SQL input
        sql_prepr = tuple([tablename]) + tuple([str_tmp2]) + tuple([str_tmp2])
                   
        curser.execute('''INSERT INTO %s (%s) VALUES (%s)''' %sql_prepr)
        
        #insert into table registry last, after abundance table was created successfully.                     
        curser.execute('''INSERT INTO abu_reg_id (refid, normid,abutable) VALUES (%s,%s,%s) ''' %
                             (ref_id,normid,tablename))
        print 'Table ',sql_prepr,' , ',' created!'
    else:
        print 'abundance table with same paper reference and solar normalization exists already! Do nothing.'
    
    
#add data
#curser.executemany('''INSERT INTO refs (name,year,nasads) VALUES ( ?, ?, ?)''', data)
#db.commit()

### Add dataset

#### add normalization data and reference

In [246]:
#add normalization data
normalization_paper=['Anders',1989,'http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G']
normalization_label=['H','He','Fe']
normalization_data = [-2.81,0.34,0.01]
add_normalization(normalization_paper,normalization_label,normalization_data)

does paper entry exists alreay?
normalization data is already available.


check correct input:

In [247]:
get_column_names('solarnorms'),db.execute('''SELECT * FROM solarnorms''').fetchall()

([u'normid', u'H', u'He', u'refid', u'C'],
 [(1, -2.81, 0.34, None, None),
  (2, -2.81, 0.34, 1, None),
  (3, -2.81, 0.34, 2, None)])

In [248]:
get_column_names('refs'),db.execute('''SELECT * FROM refs''').fetchall()

([u'refid', u'fauthor', u'year', u'nasads'],
 [(1, u'Venn', 2012, u'http://adsabs.harvard.edu/abs/2012ApJ...751..102V'),
  (2,
   u'Anders',
   1989,
   u'http://ukads.nottingham.ac.uk/abs/1993A%26A...271..587G')])

#### add abundance data

In [249]:
#add abundance data
abundance_paper=['Venn',2012,'http://adsabs.harvard.edu/abs/2012ApJ...751..102V','Milky Way']
abundance_norm=['Anders','1989']
abundance_label=['[Fe/H]','err']
abundance_data = [-2.81,0.34]
add_abundance_data(abundance_paper,abundance_norm,abundance_label,abundance_data)


Normalization paper is available.
found corresponding normalization data.
abu data paper exists already. do nothing
FOUND norm_id:  3  ref_id  1
test
create abu table entry


OperationalError: near "err": syntax error

In [250]:
(3,4,5)+tuple([1,2,3])

(3, 4, 5, 1, 2, 3)

In [19]:
a = 'test %s' % 'a'
print a

test a


In [None]:
#test
results=db.execute('SELECT * FROM refs')
results.fetchall()[0]

### Add galaxies

In [23]:
type((1,2))

tuple

In [None]:
#create table
db=sqlite3.connect('stellab.db')
curser=db.cursor()
curser.execute('''CREATE TABLE IF NOT EXISTS galaxies (galid integer primary key autoincrement, name text)''')

In [15]:
#add data
#db.commit()

In [None]:
#add data
curser.execute('''CREATE TABLE IF NOT EXISTS solarnorms (normid integer primary key autoincrement, H float, He float, refid integer)''')
