In [1]:
# First, check if the DB file for this notebook exists and remove it. 
import os

if os.path.exists('example_enc-arr.db'):
    os.remove('example_enc-arr.db')

In [2]:
import sqlite3
from sqlalchemy import create_engine

engine = create_engine('sqlite:///example_enc-arr.db', echo=True)

For example, use a "Declarative Base" object to use an "object-model" approach.  We can defined Tables by themselves using SQLAlchemy, but we're not going to do that right now.  

In [3]:
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

This uses the Declarative style to create a "records" object with the fields we want.  

Alternative styles are available, I don't know at this time which is most relevant to cover or if an alternative style would be more useful. 

The Datatypes can be chosen by the SQLite "dialect" and its specification, as per [here](https://docs.sqlalchemy.org/en/13/dialects/sqlite.html#sqlite-data-types). (see commented import lines at top of cell).

In [4]:
from sqlalchemy import Column
from sqlalchemy.types import Integer, BigInteger, Boolean, BLOB

## If we wanted to be super specific on SQLite datatypes...
#from sqlalchemy.dialects.sqlite import INTEGER, BOOLEAN, SMALLINT, BLOB

class RecordModel(Base):
    __tablename__ = 'records'
    
    recordID = Column(Integer,
                      primary_key=True)
    
    SeriesNumber = Column(BigInteger, 
                          index=True, 
                          #unique=True, 
                          nullable=False)
    
    EventNumber = Column(BigInteger,
                         index=True,
                         unique=True,
                         nullable=False)
    
    NDetectors = Column(Integer,
                        nullable=False)
    
    DetNum = Column(BLOB)
    DetType = Column(BLOB)
    Enable = Column(BLOB)
    
    
    def __repr__(self):
        return "<Record(recordID='%s', SeriesNumber='%s', EventNumber=''%s')" % (
                    self.recordID, self.SeriesNumber, self.EventNumber)
    
        

Using the declarative style automatically creates the indexed "metadata schema" for the database, which will speed up queries and make the database easily sharable.  

In [5]:
RecordModel.__table__

Table('records', MetaData(bind=None), Column('recordID', Integer(), table=<records>, primary_key=True, nullable=False), Column('SeriesNumber', BigInteger(), table=<records>, nullable=False), Column('EventNumber', BigInteger(), table=<records>, nullable=False), Column('NDetectors', Integer(), table=<records>, nullable=False), Column('DetNum', BLOB(), table=<records>), Column('DetType', BLOB(), table=<records>), Column('Enable', BLOB(), table=<records>), schema=None)

Now let's actually create the SQLite database:

In [6]:
%%time
Base.metadata.create_all(engine)

2020-06-08 21:48:40,745 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-06-08 21:48:40,747 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 21:48:40,750 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2020-06-08 21:48:40,751 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 21:48:40,752 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("records")
2020-06-08 21:48:40,753 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 21:48:40,753 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("records")
2020-06-08 21:48:40,754 INFO sqlalchemy.engine.base.Engine ()
2020-06-08 21:48:40,756 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE records (
	"recordID" INTEGER NOT NULL, 
	"SeriesNumber" BIGINT NOT NULL, 
	"EventNumber" BIGINT NOT NULL, 
	"NDetectors" INTEGER NOT NULL, 
	"DetNum" BLOB, 
	"DetType" BLOB, 
	"Enable" BLOB, 
	PRIMARY KEY ("recordID")
)


2020-06-08 21:48:40,757 INFO sql

Let's create some example entries, and insert them in the database.

In [7]:
from sqlalchemy.orm import sessionmaker

# Use a 'factory' for creating a connection to our database.
Session = sessionmaker(bind=engine)

# Create actual session object we will use to interact with DB
session = Session()

In [8]:
%%time
# Create some example records
import random
import numpy as np

before_commit = []

inttype = np.int32
booltype = np.bool
_ndets = 35

for i in range(0,8500):
    # Random filler values
    _DetNum = np.array([x for x in range(1,_ndets)], dtype=inttype)
    _DetType = np.random.randint(low=1, high=9, size=_ndets, dtype=inttype)
    _Enable = np.random.choice(a=[False, True], size=_ndets).astype(booltype)
    
    # Check conversions to binary buffer
    bin1 = _DetNum.tobytes('C')
    #assert(all(_DetNum == np.frombuffer(bin1, dtype=inttype)))
    
    bin2 = _DetType.tobytes('C')
    #assert(all(_DetType == np.frombuffer(bin2, dtype=inttype)))
    
    bin3 = _Enable.tobytes('C')
    #assert(all(_Enable == np.frombuffer(bin3, dtype=booltype)))
    
    new_record = RecordModel(SeriesNumber=1005,
                             EventNumber=500+i,
                             NDetectors=_ndets,
                             DetNum=bin1,
                             DetType=bin2,
                             Enable=bin3)
    before_commit.append(new_record)



CPU times: user 868 ms, sys: 28.1 ms, total: 896 ms
Wall time: 889 ms


In [9]:
# how many objects?

print(len(before_commit))

8500


In [10]:
%%time
# Add them to the session and save session to DB, aka "Database Transaction"

session.bulk_save_objects(before_commit)
session.commit()

2020-06-08 21:48:41,743 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-06-08 21:48:41,914 INFO sqlalchemy.engine.base.Engine INSERT INTO records ("SeriesNumber", "EventNumber", "NDetectors", "DetNum", "DetType", "Enable") VALUES (?, ?, ?, ?, ?, ?)
2020-06-08 21:48:41,915 INFO sqlalchemy.engine.base.Engine ((1005, 500, 35, <memory at 0x7f84a123af40>, <memory at 0x7f84a0018340>, <memory at 0x7f84a0018280>), (1005, 501, 35, <memory at 0x7f84a0018100>, <memory at 0x7f84a00181c0>, <memory at 0x7f84a0018640>), (1005, 502, 35, <memory at 0x7f84a0018400>, <memory at 0x7f84a00184c0>, <memory at 0x7f84a0018580>), (1005, 503, 35, <memory at 0x7f84a0018700>, <memory at 0x7f84a00187c0>, <memory at 0x7f84a0018880>), (1005, 504, 35, <memory at 0x7f84a0018940>, <memory at 0x7f84a0018a00>, <memory at 0x7f84a0018ac0>), (1005, 505, 35, <memory at 0x7f84a0018b80>, <memory at 0x7f84a0018c40>, <memory at 0x7f84a0018d00>), (1005, 506, 35, <memory at 0x7f84a0018dc0>, <memory at 0x7f84a0018e80>, <mem

What's our resulting filesize? (n.b. Size is in "MebiBytes" [MiB]).

In [11]:
import os

bytecount = os.stat('example_enc-arr.db').st_size
print("DB Size is ", (bytecount * (9.537E-7)), " [MiB]")

DB Size is  2.9805490176  [MiB]


Now, let's close the session and disconnect the engine.

Now, we will reconnect to the database file with a proper session, perform a query to load some records, and store in a pandas dataframe. 