In [1]:
# First, check if the DB file for this notebook exists and remove it. 
import os

if os.path.exists('example.db'):
    os.remove('example.db')

In [2]:
import sqlite3
from sqlalchemy import create_engine

engine = create_engine('sqlite:///example.db', echo=True)

For example, use a "Declarative Base" object to use an "object-model" approach.  We can defined Tables by themselves using SQLAlchemy, but we're not going to do that right now.  

In [3]:
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

This uses the Declarative style to create a "records" object with the fields we want.  

Please note that in the following definition, by keeping only one "detector" and one "enabled" field, we remove the ability for unique indexing on the SeriesNumber and EventNumber fields.

In [4]:
from sqlalchemy import Column, Integer, BigInteger, String, DateTime, Float, Boolean
from sqlalchemy import Sequence

class RecordModel(Base):
    __tablename__ = 'records'
    
    recordID = Column(Integer, 
                      Sequence('record_id_seq'), 
                      primary_key=True)
    SeriesNumber = Column(BigInteger, 
                          index=True, 
                          #unique=True, 
                          nullable=False)
    EventNumber = Column(BigInteger,
                         index=True,
                         #unique=True,
                         nullable=False)
    DetNum = Column(Integer)
    DetType = Column(Integer) 
    Enabled = Column(Boolean)
    
    
    def __repr__(self):
        return "<Record(recordID='%s', SeriesNumber='%s', EventNumber=''%s')" % (
                    self.recordID, self.SeriesNumber, self.EventNumber)
    
        

Using the declarative style automatically creates the indexed "metadata schema" for the database, which will speed up queries and make the database easily sharable.  

In [5]:
RecordModel.__table__

Table('records', MetaData(bind=None), Column('recordID', Integer(), table=<records>, primary_key=True, nullable=False, default=Sequence('record_id_seq', metadata=MetaData(bind=None))), Column('SeriesNumber', BigInteger(), table=<records>, nullable=False), Column('EventNumber', BigInteger(), table=<records>, nullable=False), Column('DetNum', Integer(), table=<records>), Column('DetType', Integer(), table=<records>), Column('Enabled', Boolean(), table=<records>), schema=None)

Now let's actually create the SQLite database:

In [6]:
Base.metadata.create_all(engine)

2020-06-08 19:36:31,128 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-06-08 19:36:31,129 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 19:36:31,129 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2020-06-08 19:36:31,130 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 19:36:31,131 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("records")
2020-06-08 19:36:31,132 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 19:36:31,133 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("records")
2020-06-08 19:36:31,133 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 19:36:31,135 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE records (
	"recordID" INTEGER NOT NULL, 
	"SeriesNumber" BIGINT NOT NULL, 
	"EventNumber" BIGINT NOT NULL, 
	"DetNum" INTEGER, 
	"DetType" INTEGER, 
	"Enabled" BOOLEAN, 
	PRIMARY KEY ("recordID"), 
	CHECK ("Enabled" IN (0, 1))
)


2020-06-08 19:36:31,136 

Let's create some example entries, and insert them in the database.

In [7]:
from sqlalchemy.orm import sessionmaker

# Use a 'factory' for creating a connection to our database.
Session = sessionmaker(bind=engine)

# Create actual session object we will use to interact with DB
session = Session()

In [8]:
%%time
# Create some example records
import random
import numpy as np

before_commit = []

for i in range(0,8500):   
    for j in range(0, 35):
        _enable = random.choice([True, False])
        new_record = RecordModel(SeriesNumber=1005,
                                 EventNumber=500+i,
                                 DetNum=j,
                                 DetType=int(j%10),
                                 Enabled=_enable)
        before_commit.append(new_record)
    


CPU times: user 8.58 s, sys: 235 ms, total: 8.81 s
Wall time: 8.83 s


In [11]:
# how many objects?

print(len(before_commit))

297500


In [9]:
%%time
# Add them to the session and save session to DB, aka "Database Transaction"

session.bulk_save_objects(before_commit)
session.commit()

2020-06-08 19:36:40,334 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-06-08 19:36:42,004 INFO sqlalchemy.engine.base.Engine INSERT INTO records ("SeriesNumber", "EventNumber", "DetNum", "DetType", "Enabled") VALUES (?, ?, ?, ?, ?)
2020-06-08 19:36:42,004 INFO sqlalchemy.engine.base.Engine ((1005, 500, 0, 0, 1), (1005, 500, 1, 1, 1), (1005, 500, 2, 2, 0), (1005, 500, 3, 3, 1), (1005, 500, 4, 4, 0), (1005, 500, 5, 5, 0), (1005, 500, 6, 6, 0), (1005, 500, 7, 7, 0)  ... displaying 10 of 297500 total bound parameter sets ...  (1005, 8999, 33, 3, 0), (1005, 8999, 34, 4, 1))
2020-06-08 19:36:42,997 INFO sqlalchemy.engine.base.Engine COMMIT
CPU times: user 2.87 s, sys: 108 ms, total: 2.98 s
Wall time: 3.03 s


What's our resulting filesize? (n.b. Size is in "MebiBytes" [MiB]).

In [10]:
import os

bytecount = os.stat('example.db').st_size
print("DB Size is ", (bytecount * (9.537E-7)), " [MiB]")

DB Size is  12.437834956800002  [MiB]


Now, let's close the session and disconnect the engine.

Now, we will reconnect to the database file with a proper session, perform a query to load some records, and store in a pandas dataframe. 