In [2]:
# import function from .py file
import pprint
from SPASE_Scraper_Script import SPASE_Scraper

# test path : "../NASA/NumericalData/DE2/IDM/PT0.25S.xml" or "../NASA/DisplayData/ACE/MAG/Daily.xml"
print("Enter path of SPASE record ... such as C:\dir1\dir2\spaseRecord_x.xml")
path = input()
(RID, RIDField, author, authorField, pub, pubField, pubDate, pubDateField, dataset, datasetField, 
 PI, PIField, AccessRights, licenseField, datalinkField) = SPASE_Scraper(path)
pubYear = pubDate[0:4]

print("The ResourceID is " + RID + " which was obtained from " + RIDField)
print("The author(s) are " + (", ".join(author)) + " which was obtained from " + authorField)
print("The publication year is " + pubYear + " which was obtained from " + pubDateField)
print("The publisher is " + pub + " which was obtained from " + pubField)
print("The dataset is " + dataset + " which was obtained from " + datasetField)
print("The persistent identifier is " + PI + " which was obtained from " + PIField)
print("The URLs with their associated product keys obtained from " + datalinkField + """ and their 
      license(s) obtained from """ + licenseField + " are: ")
pprint.pprint(AccessRights)

# call .py file directly from notebook
#%run ./SPASE_test.py {path} in notebook
#import sys

#args = sys.argv
#path = args[1] in source file


Enter path of SPASE record ... such as C:\dir1\dir2\spaseRecord_x.xml


 ../NASA/NumericalData/DE2/IDM/PT0.25S.xml


You entered ../NASA/NumericalData/DE2/IDM/PT0.25S.xml
File size is: 0.00 GB
The ResourceID is spase://NASA/NumericalData/DE2/IDM/PT0.25S which was obtained from NumericalData/ResourceID
The author(s) are Heelis, Roderick, A.; Candey, Robert, M. which was obtained from NumericalData/PublicationInfo/Authors
The publication year is 2023-01-01T00:00:00 which was obtained from NumericalData/PublicationInfo/PublicationDate
The publisher is NASA Space Physics Data Facility which was obtained from NumericalData/PublicationInfo/PublishedBy
The dataset is DE 2 250ms IDM Ion Drift Velocities which was obtained from NumericalData/ResourceHeader/ResourceName
The persistent identifier is https://doi.org/10.48322/reef-jt02 which was obtained from NumericalData/DOI
The URLs with their associated product keys obtained from NumericalData/AccessInformation/AccessURL/URL and their 
      license(s) obtained from NumericalData/AccessInformation/AccessRights are: 
{'Open': {'ftps://spdf.gsfc.nasa.gov/pub/da

In [2]:
# separate license from the datalink and product keys from AccessRights to store in db
for k, v in AccessRights.items():
    if not v:
        continue
    else:
        license = k
        print(k + " was assigned to license")
        #for key, val in v.items():
         #   datalink = key
        datalink = str(v)
        print(datalink + " was assigned to datalink")
            

Open was assigned to license
{'https://cdaweb.gsfc.nasa.gov/cgi-bin/gif_walk?plot_type=ace_kp_plots': []} was assigned to datalink


In [5]:
# add table to existing database
import sqlite3

def create_tables():
    sql_statements = [ 
        """CREATE TABLE IF NOT EXISTS SPASE_Metadata (
                id INTEGER PRIMARY KEY, 
                SPASE_id TEXT NOT NULL UNIQUE, 
                author TEXT,
                author_source TEXT,
                publisher TEXT,
                publisher_source TEXT,
                publication_yr TEXT,
                publication_yr_source TEXT,
                dataset TEXT,
                dataset_source TEXT,
                license TEXT,
                license_source TEXT,
                datalink TEXT,
                datalink_source TEXT,
                description TEXT,
                PI TEXT,
                PI_source TEXT
        );"""]

    # create a database connection
    try:
        with sqlite3.connect('SPASE_Data.db') as conn:
            cursor = conn.cursor()
            for statement in sql_statements:
                cursor.execute(statement)
            
            conn.commit()
    except sqlite3.Error as e:
        print(e)


if __name__ == '__main__':
    create_tables()

In [6]:
# insert entries into table
import sqlite3

def add_SPASE(conn, entry):
    sql = '''INSERT INTO SPASE_Metadata(SPASE_id,author,author_source,publisher,publisher_source,
            publication_yr,publication_yr_source,dataset,dataset_source,license,license_source,
            datalink,datalink_source,description,PI,PI_source)
            VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) '''
    cur = conn.cursor()
    cur.execute(sql, entry)
    conn.commit()
    return cur.lastrowid


def main():
    try:
        with sqlite3.connect('SPASE_Data.db') as conn:
            # add a new SPASE Record
            SPASE_Record = (RID, author, authorField, pub, pubField, pubYear, pubDateField, 
                            dataset, datasetField, license, licenseField, datalink, datalinkField,
                            "some description", PI, PIField)
            Record_id = add_SPASE(conn, SPASE_Record)
            print(f'Created a SPASE Record with the id {Record_id}')

    except sqlite3.Error as e:
        print(e)

if __name__ == '__main__':
    main()

Created a SPASE Record with the id 1


In [7]:
# select row(s) from table
import sqlite3

try:
    with sqlite3.connect("SPASE_Data.db") as conn:
        cur = conn.cursor()
        cur.execute('SELECT * FROM SPASE_Metadata')
        rows = cur.fetchall()
        for row in rows:
            print(row)
except sqlite3.Error as e:
    print(e)

(1, 'spase://NASA/DisplayData/ACE/MAG/Daily', 'spase://SMWG/Person/Ruth.Skoug', 'DisplayData/ResourceHeader/Contact/PersonID', 'NASA/GSFC/SPDF', 'DisplayData/AccessInformation/RepositoryID', '', 'DisplayData/PublicationInfo/PublicationDate', 'ACE Daily Survey Plots', 'DisplayData/ResourceHeader/ResourceName', 'Open', 'DisplayData/AccessInformation/AccessRights', "{'https://cdaweb.gsfc.nasa.gov/cgi-bin/gif_walk?plot_type=ace_kp_plots': []}", 'DisplayData/AccessInformation/AccessURL/URL', 'some description', '', 'DisplayData/DOI')


In [None]:
# delete row from table
import sqlite3

try:
    with sqlite3.connect("SPASE_Data.db") as conn:
        cur = conn.cursor()
        delete_stmt = 'DELETE FROM SPASE_Metadata WHERE id = ?'
        cur.execute(delete_stmt, (id,))
        conn.commit()
except sqlite3.Error as e:
    print(e)