# Database Management

## Notebook for creating and editing database tables/entries

In [7]:
import sqlite3
sqlite3.register_adapter(np.int64, lambda val: int(val))  # sqlite3 won't accept int > 8 bytes
sqlite3.register_adapter(np.int32, lambda val: int(val))
import warnings

db_dir = '/nfs/proj-repo/AAARG-dissertation'
db_name = 'sumresults.db'
db_path = db_dir + '/' + db_name

In [2]:
def get_connection(path):
    conn = sqlite3.connect(path)  # creates db if doesn't exist
    c = conn.cursor()
    return conn, c

In [3]:
def create_tables(conn, cursor, topic=True, techniques=True, 
                  instances=True, nuggets=True, meta=True, updates=True):
    if topic: # create topic table
        print("creating topic table...")
        cursor.execute("""CREATE TABLE IF NOT EXISTS topics (
            id integer PRIMARY KEY,
            title text,
            description text,
            start integer,
            end integer,
            query text,
            type text
        )""")
    if techniques:
        print("creating techniques table...")
        cursor.execute("""CREATE TABLE IF NOT EXISTS techniques (
            name text PRIMARY KEY,
            description text
        )""")
    if instances:
        print("creating instances table...")
        cursor.execute("""CREATE TABLE IF NOT EXISTS instances (
            instance integer PRIMARY KEY,
            technique text,
            temporal boolean,
            start_exec datetime,
            end_exec datetime,
            FOREIGN KEY (technique) REFERENCES techniques (name)
        )""")
    
    if meta:
        print("creating meta table...")
        # put many-many foreign key for contained nuggets
        cursor.execute("""CREATE TABLE IF NOT EXISTS meta (
            topic_id integer,
            instance integer,
            summary text,
            streamids text,
            epoch_start integer,
            epoch_end integer,
            importance_score integer,
            total_importance integer,
            r1_precision real,
            r1_recall real,
            r1_fmeasure real,
            has_updates boolean,
            FOREIGN KEY (topic_id) REFERENCES topics (id),
            FOREIGN KEY (instance) REFERENCES instances (instance),
            PRIMARY KEY (instance, topic_id)
        )""")
    if updates:
        print("creating updates table...")
        cursor.execute("""CREATE TABLE IF NOT EXISTS updates (
            instance integer,
            topic_id integer,
            technique text,
            update_num integer,
            cur_text text,
            update_text text,
            epoch_start integer,
            epoch_end integer,
            importance_score integer,
            total_importance integer,
            r1_precision real,
            r1_recall real,
            r1_fmeasure real,
            FOREIGN KEY (instance) REFERENCES instances (instance),
            FOREIGN KEY (topic_id) REFERENCES topics (id)
            FOREIGN KEY (technique) REFERENCES techniques (name)
            PRIMARY KEY (instance, topic_id, update_num)
        )""")
    if nuggets:
        print("creating nuggets table...")
        cursor.execute("""CREATE TABLE IF NOT EXISTS nuggets (
            nugget_id text PRIMARY_KEY,
            topic_id integer,
            importance integer,
            nugget_text text,
            FOREIGN KEY (topic_id) REFERENCES topics (id)
        )""")
        # create linking table for many-many relationship
        print("creating linking nuggets table")
        cursor.execute("""CREATE TABLE IF NOT EXISTS nugget_instances (
            nugget_id text,
            topic_id integer,
            instance integer,
            technique text,
            update_num integer,
            is_update boolean,
            is_complete_summary boolean,
            found boolean,
            FOREIGN KEY (nugget_id) REFERENCES nuggets (nugget_id),
            FOREIGN KEY (topic_id) REFERENCES topics (id),
            FOREIGN KEY (instance) REFERENCES instances (instance),
            FOREIGN KEY (technique) REFERENCES techniques (name),
            FOREIGN KEY (update_num) REFERENCES updates (update_num)
        )""")
    print("committing changes")
    conn.commit()
    print("committed")
#     conn.close()

create_tables(db_path)

creating topic table...
creating techniques table...
creating instances table...
creating meta table...
creating updates table...
creating nuggets table...
creating linking nuggets table
committing changes
committed


## Pre-Populating Database

In [6]:
def populate_topics(conn, cursor, topics_df):
    # check if already populated
    cursor.execute('SELECT COUNT(id) FROM topics')
    rowcount = cursor.fetchone()[0]
    if rowcount == 0:
        cols = ['id', 'title', 'description', 'start', 'end', 'query', 'type']
        entries = []
        for index, row in topics_df.iterrows():
            entry = []
            for col in cols:
                entry.append(row[col])
            entries.append(entry)
        cursor.executemany('insert into topics values (?,?,?,?,?,?,?)', entries)
    else:
        warnings.warn("The topics table already has entries")
    conn.commit()
    print("populated topics")

In [None]:
def populate_nuggets(conn, cursor, nugget_df):
    cursor.execute('SELECT COUNT(nugget_id) FROM nuggets')
    rowcount = cursor.fetchone()[0]
    if rowcount == 0:
        cols = ['nugget_id', 'topic_id', 'importance', 'nugget_text']
        entries = []
        for index, row in nugget_df.iterrows():
            entry = []
            for col in cols:
                entry.append(row[col])
            entries.append(entry)
        cursor.executemany('insert into nuggets values (?,?,?,?)', entries)
    else:
        warnings.warn("The nuggets table already has entries")
    conn.commit()
    print("populated nuggets")

## Insertions

In [5]:
# def insert_