# Object Relational Mapping with sqlalchemy and sqlite databases
*Author: Marco Prenassi*   
*License: Creative Commons [insert type, cc-by?]*   

In [4]:
!pip install sqlalchemy



## ORM WITH SQLALCHEMY  
*Let's understand what is ORM*
*We are going to use the quickStart example on https://docs.sqlalchemy.org/en/20/*
*We use the declarative mapping, as a standard declaration*

In [4]:
# This is a type of data, used also in sql, that's a VARCHAR
from sqlalchemy import String
# These are imporant basic modules to declare tables in SQLAlchemy ORM
from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.orm import Mapped
from sqlalchemy.orm import mapped_column

# We inherit DeclarativeBase to create a Base, why? Because we need to create a specific registry linked to the database.
# class normally do not hold attributes, but in this specific case they do, as it is used as a "legend" to navigate the SQL schema
class Base(DeclarativeBase):
    pass

# Let's see if there's something (you only see an "object" like: <sqlalchemy.orm.decl_api.registry object at ....>
print(Base.registry)

for row in Base.registry.mappers:
    print(row)

class Sample(Base):
    __tablename__ = "sample"
    # This "id: Mapped[int]" is a type hint, helps python to identify the right variable type, as mapped_column is type agnostic
    name: Mapped[str] = mapped_column(String[50], primary_key=True)
    
    # Type hint here is real 
    molecular_weight: Mapped[float]

<sqlalchemy.orm.decl_api.registry object at 0xffff866b04d0>


In [31]:
# Just to be sure, let's delete the database and..
!rm ./databases/orm_database.db

In [32]:
# Create a new sqlite database
from sqlalchemy import create_engine
engine = create_engine("sqlite:///databases/orm_database.db", echo=True)
# Create all the table, using the definition in Base (that's why we used it)
Base.metadata.create_all(engine)

2024-10-13 18:53:12,289 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 18:53:12,292 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sample")
2024-10-13 18:53:12,293 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 18:53:12,300 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("sample")
2024-10-13 18:53:12,302 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 18:53:12,315 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("reactions")
2024-10-13 18:53:12,319 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 18:53:12,321 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("reactions")
2024-10-13 18:53:12,321 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 18:53:12,325 INFO sqlalchemy.engine.Engine 
CREATE TABLE sample (
	name VARCHAR NOT NULL, 
	molecular_weight FLOAT NOT NULL, 
	PRIMARY KEY (name)
)


2024-10-13 18:53:12,326 INFO sqlalchemy.engine.Engine [no key 0.00095s] ()
2024-10-13 18:53:12,331 INFO sqlalchemy.engine.Engine 
CREATE TABLE rea

In [7]:
# Let's check the mappers
for row in Base.registry.mappers:
    print(row)

Mapper[Sample(sample)]


In [8]:
# And user inspect to verify other parameters
from sqlalchemy import inspect
for row in inspect(Sample).columns:
    print(row)

sample.name
sample.molecular_weight


In [9]:
# Let's populate our table with data!
from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError

# Create a session
with Session(engine) as session:
    try:
        # Let's insert the first row of data
        CO2 = Sample(
            name="Carbon Dioxide",
            molecular_weight=44.05,
        )
        # Let's insert the second raw of data
        C6H6 = Sample(name = 'Benzene', molecular_weight = 78.11)
        session.add_all([CO2, C6H6])
        session.commit()
    except IntegrityError as e:
        print(f"Error, created new data must have a unique primary key! \n{e}")

2024-10-13 13:29:58,006 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:29:58,013 INFO sqlalchemy.engine.Engine INSERT INTO sample (name, molecular_weight) VALUES (?, ?)
2024-10-13 13:29:58,013 INFO sqlalchemy.engine.Engine [generated in 0.00076s] [('Carbon Dioxide', 44.05), ('Benzene', 78.11)]
2024-10-13 13:29:58,017 INFO sqlalchemy.engine.Engine COMMIT


In [10]:
# Let's use a SELECT to retrieve data

from sqlalchemy import select
with Session(engine) as session:
    stmt = select(Sample)

    for sample in session.scalars(stmt):
        print(f"{sample.name} - {sample.molecular_weight}")

2024-10-13 13:30:00,756 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:00,783 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample
2024-10-13 13:30:00,784 INFO sqlalchemy.engine.Engine [generated in 0.00166s] ()
Carbon Dioxide - 44.05
Benzene - 78.11
2024-10-13 13:30:00,787 INFO sqlalchemy.engine.Engine ROLLBACK


In [11]:
# Let's use a SELECT WHERE to define better our row boundaries

from sqlalchemy import select
with Session(engine) as session:
    stmt = select(Sample).where(Sample.molecular_weight > 50.0 )
    
    for sample in session.scalars(stmt):
        print(f"{sample.name} - {sample.molecular_weight}")

2024-10-13 13:30:01,327 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:01,332 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample 
WHERE sample.molecular_weight > ?
2024-10-13 13:30:01,333 INFO sqlalchemy.engine.Engine [generated in 0.00133s] (50.0,)
Benzene - 78.11
2024-10-13 13:30:01,335 INFO sqlalchemy.engine.Engine ROLLBACK


In [12]:
# Let's change a value!
with Session(engine) as session:
    stmt = select(Sample).where(Sample.name == "Carbon Dioxide")
    one_row_object = session.scalars(stmt).one()
    one_row_object.molecular_weight = 44.01    
    session.commit()
    stmt = select(Sample)
    
    for sample in session.scalars(stmt):
        print(f"{sample.name} - {sample.molecular_weight}")

2024-10-13 13:30:03,431 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:03,438 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample 
WHERE sample.name = ?
2024-10-13 13:30:03,440 INFO sqlalchemy.engine.Engine [generated in 0.00199s] ('Carbon Dioxide',)
2024-10-13 13:30:03,465 INFO sqlalchemy.engine.Engine UPDATE sample SET molecular_weight=? WHERE sample.name = ?
2024-10-13 13:30:03,468 INFO sqlalchemy.engine.Engine [generated in 0.00348s] (44.01, 'Carbon Dioxide')
2024-10-13 13:30:03,470 INFO sqlalchemy.engine.Engine COMMIT
2024-10-13 13:30:03,474 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:03,477 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample
2024-10-13 13:30:03,477 INFO sqlalchemy.engine.Engine [cached since 2.695s ago] ()
Carbon Dioxide - 44.01
Benzene - 78.11
2024-10-13 13:30:03,480 INFO sqlalchemy.engine.Engine ROLLBACK


#### SELECT with JOIN

In [13]:
# Let's create another table
from sqlalchemy.exc import InvalidRequestError
# Just to be sure let's drop the table
try:
    Reactions.__table__.drop(engine)
except:
    pass # THIS IS A CAPITAL SIN. DO NOT DO THIS.

class Reactions(Base):
    __tablename__ = 'reactions'
    reaction_product : Mapped[str] = mapped_column(String[80], primary_key=True)
    component_1 : Mapped[str] = mapped_column(String[80])
    component_2 : Mapped[str] = mapped_column(String[80])
    energy : Mapped[float] = mapped_column(nullable=True)
    # this is useful if you are modifing the table dynamically (or you need to show it multiple times to the students!)
    __table_args__ = {'extend_existing':True}

    # The parameters are starting to get out of hands, and I am lazy, so we define a string rappresentation of our data
    # To not compose a print statement every time (is comparable to a ToString() method on other languages
    def __repr__(self):
        return (f"Reactions(reaction_product={self.reaction_product!r},component_1={self.component_1!r},component_2={self.component_2!r}),energy={self.energy!r})")

Base.metadata.create_all(engine)

2024-10-13 13:30:06,276 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:06,287 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sample")
2024-10-13 13:30:06,291 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 13:30:06,294 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("reactions")
2024-10-13 13:30:06,296 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 13:30:06,297 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("reactions")
2024-10-13 13:30:06,298 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-13 13:30:06,301 INFO sqlalchemy.engine.Engine 
CREATE TABLE reactions (
	reaction_product VARCHAR NOT NULL, 
	component_1 VARCHAR NOT NULL, 
	component_2 VARCHAR NOT NULL, 
	energy FLOAT, 
	PRIMARY KEY (reaction_product)
)


2024-10-13 13:30:06,301 INFO sqlalchemy.engine.Engine [no key 0.00069s] ()
2024-10-13 13:30:06,346 INFO sqlalchemy.engine.Engine COMMIT


In [14]:
from sqlalchemy.exc import IntegrityError
with Session(engine) as session:
    try:
        Friz = Reactions(reaction_product = 'Carbonic Acid', component_1 = 'Carbon Dioxide', component_2 = 'Water', energy=None)
        Etha = Reactions(reaction_product = 'Ethanol', component_1 = 'Ethilene', component_2 ='Water', energy = 1000)
        session.add_all([Friz, Etha])
        session.commit()
    except IntegrityError as e:
        print(e)
    

2024-10-13 13:30:09,280 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:09,302 INFO sqlalchemy.engine.Engine INSERT INTO reactions (reaction_product, component_1, component_2, energy) VALUES (?, ?, ?, ?)
2024-10-13 13:30:09,304 INFO sqlalchemy.engine.Engine [generated in 0.00232s] [('Carbonic Acid', 'Carbon Dioxide', 'Water', None), ('Ethanol', 'Ethilene', 'Water', 1000.0)]
2024-10-13 13:30:09,312 INFO sqlalchemy.engine.Engine COMMIT


In [15]:
# Let's check our table

from sqlalchemy import select
with Session(engine) as session:
    stmt = select(Reactions)
    print("REACTION TABLE:")
    for reaction in session.scalars(stmt):
        print(reaction)
    stmt = select(Sample)
    print("SAMPLE TABLE:")
    for sample in session.scalars(stmt):
        print(f"{sample.name} - {sample.molecular_weight}")


REACTION TABLE:
2024-10-13 13:30:11,114 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:11,120 INFO sqlalchemy.engine.Engine SELECT reactions.reaction_product, reactions.component_1, reactions.component_2, reactions.energy 
FROM reactions
2024-10-13 13:30:11,123 INFO sqlalchemy.engine.Engine [generated in 0.00359s] ()
Reactions(reaction_product='Carbonic Acid',component_1='Carbon Dioxide',component_2='Water'),energy=None)
Reactions(reaction_product='Ethanol',component_1='Ethilene',component_2='Water'),energy=1000.0)
SAMPLE TABLE:
2024-10-13 13:30:11,129 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample
2024-10-13 13:30:11,130 INFO sqlalchemy.engine.Engine [cached since 10.35s ago] ()
Carbon Dioxide - 44.01
Benzene - 78.11
2024-10-13 13:30:11,131 INFO sqlalchemy.engine.Engine ROLLBACK


In [16]:
# And now we delete something
with Session(engine) as session:
    C6H6 = session.get(Sample, 'Benzene')
    print(C6H6.name + " " + str(C6H6.molecular_weight))
    session.delete(C6H6)
    session.commit()

2024-10-13 13:30:13,663 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:13,672 INFO sqlalchemy.engine.Engine SELECT sample.name AS sample_name, sample.molecular_weight AS sample_molecular_weight 
FROM sample 
WHERE sample.name = ?
2024-10-13 13:30:13,674 INFO sqlalchemy.engine.Engine [generated in 0.00359s] ('Benzene',)
Benzene 78.11
2024-10-13 13:30:13,679 INFO sqlalchemy.engine.Engine DELETE FROM sample WHERE sample.name = ?
2024-10-13 13:30:13,684 INFO sqlalchemy.engine.Engine [generated in 0.00485s] ('Benzene',)
2024-10-13 13:30:13,691 INFO sqlalchemy.engine.Engine COMMIT


In [36]:
# Let's recreate our Benzene with JSON
import json
raw_json_list = '{"C6H6" : {"name" : "Benzene", "molecular_weight" : "78.11"}}'
print(json.loads(raw_json_list)['C6H6'])

class Base(DeclarativeBase):
    pass

class Sample(Base):
    __tablename__ = "sample"
    # This "id: Mapped[int]" is a type hint, helps python to identify the right variable type, as mapped_column is type agnostic
    name: Mapped[str] = mapped_column(String[50], primary_key=True)
    
    # Type hint here is real 
    molecular_weight: Mapped[float]

    def from_json(self, in_json):
        self.name = in_json['name']
        self.molecular_weight = in_json['molecular_weight']

with Session(engine) as session:
    C6H6 = Sample()
    C6H6.from_json(json.loads(raw_json_list)['C6H6'])
    session.add(C6H6)
    session.commit()

{'name': 'Benzene', 'molecular_weight': '78.11'}
2024-10-13 18:55:05,882 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 18:55:05,901 INFO sqlalchemy.engine.Engine INSERT INTO sample (name, molecular_weight) VALUES (?, ?)
2024-10-13 18:55:05,902 INFO sqlalchemy.engine.Engine [generated in 0.00214s] ('Benzene', 78.11)
2024-10-13 18:55:05,908 INFO sqlalchemy.engine.Engine COMMIT


In [18]:
# Let's use a Join in ORM in a "naive" way
from sqlalchemy import select
with Session(engine) as session:
    stmt = session.query(Reactions).join(Sample,Reactions.component_1==Sample.name)
    for result in session.scalars(stmt):
        print(result)

2024-10-13 13:30:21,103 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-13 13:30:21,125 INFO sqlalchemy.engine.Engine SELECT reactions.reaction_product AS reactions_reaction_product, reactions.component_1 AS reactions_component_1, reactions.component_2 AS reactions_component_2, reactions.energy AS reactions_energy 
FROM reactions JOIN sample ON reactions.component_1 = sample.name
2024-10-13 13:30:21,128 INFO sqlalchemy.engine.Engine [generated in 0.00451s] ()
Reactions(reaction_product='Carbonic Acid',component_1='Carbon Dioxide',component_2='Water'),energy=None)
2024-10-13 13:30:21,131 INFO sqlalchemy.engine.Engine ROLLBACK


  for result in session.scalars(stmt):


*It works but we got scolded!*
*Now let's return to the slide, and let's learn about relations and foreign keys!*

### A LITTLE DIVE INTO THE MAPPING LIMITATIONS AND PITFALLS
*These examples are made to explore how to **NOT** use ORM, remember that works better when the link between the SQL and OOP are solid*    
<span style="color:red">Beware: THIS ARE EXAMPLES TO TEACH HOW DO NOT USE ORM IN GENERAL, EXPECT PROBLEMS!!! </span>

In [178]:
# Let's use inheritance and abstraction to add a method to our "table":
class BetterSample(Sample):
    def moles(self,weight):
        return weight/self.molecular_weight
# You see that you get a warning, the table, as declared, already exists, the class is usable and mapped but it is not ideal
# Inheritance is still usable (a lot, we will see in the next session), but in this case is better to just add the method on
# the original class

  class BetterSample(Sample):


In [179]:
# So now we panic and we want to delete everything!
# Quiz Time, what we delete with this:
# BetterSample.__table__.drop(engine)?

from sqlalchemy.exc import OperationalError
BetterSample.__table__.drop(engine)
with Session(engine) as session:
    stmt = select(Sample).where(Sample.molecular_weight > 50.0 )
    try:
        session.scalars(stmt)
    except OperationalError as e:
        print(f"\n ERROR: \n {e}")

2024-10-08 09:00:04,959 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-08 09:00:04,980 INFO sqlalchemy.engine.Engine 
DROP TABLE sample
2024-10-08 09:00:04,981 INFO sqlalchemy.engine.Engine [no key 0.00164s] ()
2024-10-08 09:00:04,995 INFO sqlalchemy.engine.Engine COMMIT
2024-10-08 09:00:04,999 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-08 09:00:05,000 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample 
WHERE sample.molecular_weight > ?
2024-10-08 09:00:05,001 INFO sqlalchemy.engine.Engine [cached since 2943s ago] (50.0,)

 ERROR: 
 (sqlite3.OperationalError) no such table: sample
[SQL: SELECT sample.name, sample.molecular_weight 
FROM sample 
WHERE sample.molecular_weight > ?]
[parameters: (50.0,)]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
2024-10-08 09:00:05,013 INFO sqlalchemy.engine.Engine ROLLBACK


In [180]:
# That's bad! Let's recreate our table schema!
# recreate the schema
Base.metadata.create_all(engine)
# NOTE THAT WE DO NOT HAVE TO CREATE THE CLASSES! WE HAVE TO REDO THE DATABASE SCHEMA CREATION!!!
# Remake our CO2
with Session(engine) as session:
    # Let's insert the first row of data
    CO2 = Sample(
        name="Carbon Dioxide",
        molecular_weight=44.01,
    )
    # Let's insert the second raw of data
    C6H6 = Sample(name = 'Benzene', molecular_weight = 78.11)
    session.add_all([CO2, C6H6])
    session.commit()

2024-10-08 09:00:21,545 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-08 09:00:21,550 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("sample")
2024-10-08 09:00:21,554 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-08 09:00:21,563 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("sample")
2024-10-08 09:00:21,564 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-10-08 09:00:21,569 INFO sqlalchemy.engine.Engine 
CREATE TABLE sample (
	name VARCHAR NOT NULL, 
	molecular_weight FLOAT NOT NULL, 
	PRIMARY KEY (name)
)


2024-10-08 09:00:21,570 INFO sqlalchemy.engine.Engine [no key 0.00029s] ()
2024-10-08 09:00:21,578 INFO sqlalchemy.engine.Engine COMMIT
2024-10-08 09:00:21,593 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-08 09:00:21,598 INFO sqlalchemy.engine.Engine INSERT INTO sample (name, molecular_weight) VALUES (?, ?)
2024-10-08 09:00:21,599 INFO sqlalchemy.engine.Engine [generated in 0.00106s] [('Carbon Dioxide', 44.01), ('Benzene', 78.11)]
2024-10-08 09:0

In [149]:
# Everything is back to normal!
with Session(engine) as session:
    stmt = select(Sample).where(Sample.molecular_weight > 50.0 )
    for sample in session.scalars(stmt):
        print(f"{sample.name} - {sample.molecular_weight}")


2024-10-08 08:14:12,625 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-08 08:14:12,650 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample 
WHERE sample.molecular_weight > ?
2024-10-08 08:14:12,653 INFO sqlalchemy.engine.Engine [cached since 191.1s ago] (50.0,)
Benzene - 78.11
2024-10-08 08:14:12,657 INFO sqlalchemy.engine.Engine ROLLBACK


In [155]:
# Multiple Inheritance done bad: extend mapped classes just to add methods or not mapped components is possible but
# not advisable, wrapping (passing the class also as an attribute) is not advisable either, especially if we work with multiple
# sessions:
class SampleWrapper:
    sample = None
    def __init__(self, sample):
        if(type(sample).__name__ == 'Sample'):
            self.sample = sample
        else:
            raise ValueError("Error: parameter is not a Sample")

    def moles(self, weight):
        try:
            return weight/self.sample.molecular_weight
        except ValueError as e:
            print(e)

# QUIZ:
# CO2 is working?
from sqlalchemy.orm.exc import DetachedInstanceError
with Session(engine) as session:
    try:
        CO2c = SampleWrapper(CO2)
        print(CO2c.moles(10))
    except DetachedInstanceError as e:
        print(e)

Instance <Sample at 0xffff8752a2d0> is not bound to a Session; attribute refresh operation cannot proceed (Background on this error at: https://sqlalche.me/e/20/bhk3)


In [186]:
# so... does this works?
with Session(engine) as session:
    try:
        CO2c = SampleWrapper(select(Sample).where(Sample.name == 'Carbon Dioxide'))
        print(CO2c.moles(10))
    except ValueError as e:
        print(e)

Error: parameter is not a Sample


In [None]:
# No! a select is not a Sample

In [188]:
# This still do not works because... 
with Session(engine) as session:
    try:
        stmt = select(Sample).where(Sample.name == 'Carbon Dioxide')
        CO2c = SampleWrapper(session.scalars(stmt).first())
        print(CO2c.moles(10))
    except ValueError as e:
        print(e)
# This seems that is working, and, as a pure OOP it is, but...
from sqlalchemy.orm.exc import UnmappedInstanceError
# while it has the value, it is not mapped to our Database:
try:
    session.add(CO2c)
    session.commit()
except UnmappedInstanceError as e:
    print(e)

2024-10-08 09:32:32,654 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-10-08 09:32:32,658 INFO sqlalchemy.engine.Engine SELECT sample.name, sample.molecular_weight 
FROM sample 
WHERE sample.name = ?
2024-10-08 09:32:32,659 INFO sqlalchemy.engine.Engine [cached since 2070s ago] ('Carbon Dioxide',)
0.22722108611679165
2024-10-08 09:32:32,663 INFO sqlalchemy.engine.Engine ROLLBACK
Class '__main__.SampleWrapper' is not mapped
