## Interact with a sql database from python

1. If you have data in a db how do you bring that data to python
2. Single csv file, unrelated csv or json files and you have to push them to db
3. Several related csv or json files that have to be pushed into the db

In [2]:
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine

In [3]:
import sqlalchemy

In [4]:
sqlalchemy.__version__

'1.4.22'

In [6]:
## sqlite, mysql, oracle, postgres
## Create a connection with the db
engine = create_engine('sqlite:///./data/music.db') ## location of the db

In [7]:
Base = automap_base()
Base.prepare(engine,reflect=True) ## we are interested in retaining the original tables that exist in the db

In [8]:
### List all the tables in the db
Base.classes.keys()

['Album',
 'Artist',
 'Customer',
 'Employee',
 'Genre',
 'Invoice',
 'InvoiceLine',
 'Track',
 'MediaType',
 'Playlist']

In [9]:
## Query the Artist table, fetch first 5 rows.
engine.execute("Select * from Artist limit 5;").fetchall()

[(1, 'AC/DC'),
 (2, 'Accept'),
 (3, 'Aerosmith'),
 (4, 'Alanis Morissette'),
 (5, 'Alice In Chains')]

In [10]:
engine.execute("Select * from Employee limit 5;").fetchall()

[(1, 'Adams', 'Andrew', 'General Manager', None, '1962-02-18 00:00:00', '2002-08-14 00:00:00', '11120 Jasper Ave NW', 'Edmonton', 'AB', 'Canada', 'T5K 2N1', '+1 (780) 428-9482', '+1 (780) 428-3457', 'andrew@chinookcorp.com'),
 (2, 'Edwards', 'Nancy', 'Sales Manager', 1, '1958-12-08 00:00:00', '2002-05-01 00:00:00', '825 8 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 2T3', '+1 (403) 262-3443', '+1 (403) 262-3322', 'nancy@chinookcorp.com'),
 (3, 'Peacock', 'Jane', 'Sales Support Agent', 2, '1973-08-29 00:00:00', '2002-04-01 00:00:00', '1111 6 Ave SW', 'Calgary', 'AB', 'Canada', 'T2P 5M5', '+1 (403) 262-3443', '+1 (403) 262-6712', 'jane@chinookcorp.com'),
 (4, 'Park', 'Margaret', 'Sales Support Agent', 2, '1947-09-19 00:00:00', '2003-05-03 00:00:00', '683 10 Street SW', 'Calgary', 'AB', 'Canada', 'T2P 5G3', '+1 (403) 263-4423', '+1 (403) 263-4289', 'margaret@chinookcorp.com'),
 (5, 'Johnson', 'Steve', 'Sales Support Agent', 2, '1965-03-03 00:00:00', '2003-10-17 00:00:00', '7727B 41 Ave', 'Cal

In [11]:
Artist = Base.classes.Artist

In [12]:
Artist.__table__.columns.keys()

['ArtistId', 'Name']

In [13]:
Emp = Base.classes.Employee

In [14]:
Emp.__table__.columns.keys()

['EmployeeId',
 'LastName',
 'FirstName',
 'Title',
 'ReportsTo',
 'BirthDate',
 'HireDate',
 'Address',
 'City',
 'State',
 'Country',
 'PostalCode',
 'Phone',
 'Fax',
 'Email']

In [15]:
import pandas as pd

In [16]:
df = pd.read_sql("Select * from Artist;",engine) ## Fetch entire data to a df, 

In [17]:
df.head()

Unnamed: 0,ArtistId,Name
0,1,AC/DC
1,2,Accept
2,3,Aerosmith
3,4,Alanis Morissette
4,5,Alice In Chains


## Insert data into a db, single/multiple csv, they are all unrelated

In [18]:
from sqlalchemy.ext.declarative import declarative_base ## creating new tables and new db
from sqlalchemy import create_engine ## creating connection
from sqlalchemy.orm import Session ## inserting data

In [75]:
Base = declarative_base()

In [20]:
### Write the schema of the table using a python class
## What are the names of the columns of a table
## What are the datatypes of different columns in the table ---> import from sqlalchemy
from sqlalchemy import Column, Integer, Numeric, String, DateTime

In [76]:
### Write my table as a python class
class Sales(Base):
    __tablename__="sales"
    id = Column(Integer(), autoincrement = True, primary_key = True)
    sales_month = Column(DateTime(),nullable=True)
    naics_code = Column(String(),nullable=True)
    kind_of_busines = Column(String(),nullable=True)
    reason_for_null = Column(String(),nullable=True)
    sales = Column(Numeric(),nullable=True) 

In [77]:
engine = create_engine("sqlite:///./data/example.db")

In [78]:
Base.metadata.create_all(engine) ## creates an empty db

In [79]:
## Insert data
session = Session(engine)

In [80]:
import csv
import datetime
from tqdm import tqdm
with open("./data/retail_sales_us.csv","r",encoding='utf-8') as f:
    reader = csv.DictReader(f,delimiter=",")
    for row in tqdm(reader):
        if row['\ufeffsales_month']=="":
            date = None
        else:
            date = datetime.datetime.fromisoformat(row['\ufeffsales_month'])
        if row['naics_code'] == "":
            naics = None
        else:
            naics = row['naics_code']
        if row['kind_of_business'] == '':
            kind = None
        else:
            kind = row['kind_of_business']
        if row['reason_for_null']=='':
            reason=None
        else:
            reason=row['reason_for_null']
        if row['sales'] == '':
            sal = None
        else:
            sal = row['sales']
                                                
        db_row = Sales(sales_month=date,
                      naics_code = naics,
                      kind_of_busines = kind,
                      reason_for_null = reason,
                      sales = sal)
        session.add(db_row)
        try:
            session.commit()
        except:
            session.rollback()

22620it [00:15, 1488.38it/s]


In [96]:
## Scenario 3: When we have related tables.
from sqlalchemy.ext.declarative import declarative_base ## creating new tables and new db
from sqlalchemy import create_engine ## creating connection
from sqlalchemy.orm import Session
from sqlalchemy import Column, Integer, Numeric, String, DateTime, ForeignKey

In [97]:
Base = declarative_base()
class Candidate(Base):
    __tablename__='candidates'
    
    candidate_id = Column(Integer(), primary_key=True)
    first_name = Column(String(), nullable=False)
    last_name = Column(String(), nullable=False)
    middle_name = Column(String())
    party = Column(String(),nullable=False)
    
class Contributions(Base):
    __tablename__="contributions"
    
    i_d = Column(Integer(),autoincrement=True,primary_key=True)
    last_name = Column(String(),nullable=False)
    first_name = Column(String(), nullable=False)
    middle_name = Column(String())
    street_1 = Column(String())
    street_2 = Column(String())
    city = Column(String())
    state = Column(String())
    zip_code = Column(Integer())
    amount = Column(Numeric())
    date = Column(DateTime())
    candidate_id = Column(Integer(),ForeignKey("candidates.candidate_id"))
    
engine = create_engine("sqlite:///./data/relationship.db")
Base.metadata.create_all(engine) 

In [98]:
session = Session(engine)
with open("../Week1/data/candidates.csv","r",encoding="utf-8") as f:
    reader = csv.DictReader(f,delimiter=",")
    for row in tqdm(reader):
        if row['id'] == '':
            can_id = None
        else:
            can_id = row['id']
        if row['first_name'] == '':
            f_name = None
        else:
            f_name = row['first_name']
        if row['last_name'] == '':
            l_name = None
        else:
            l_name = row['last_name']
        if row['middle_name'] == '':
            m_name = None
        else:
            m_name = row['middle_name']
        if row['party'] == '':
            party = None
        else:
            party = row['party']
        db_row = Candidate(candidate_id = can_id,
                          first_name = f_name,
                          last_name = l_name,
                          middle_name = m_name,
                          party = party)
        session.add(db_row)
        try:
            session.commit()
        except:
            session.rollback()
        

17it [00:00, 766.00it/s]


In [99]:
with open("../Week1/data/contributions.csv","r",encoding="utf-8") as f:
    reader = csv.DictReader(f,delimiter=",")
    for row in tqdm(reader):
        if row['first_name']=='':
            f_name = None
        else:
            f_name = row['first_name']
        if row['last_name'] == '':
            l_name = None
        else:
            l_name = row['last_name']
        if row['middle_name'] == '':
            m_name = None
        else:
            m_name = row['middle_name']
        if row['street_1'] == '':
            s_1 = None
        else:
            s_1 = row['street_1']
        if row['street_2'] == '':
            s_2 = None
        else:
            s_2 = row['street_2']
        if row['city'] == '':
            city = None
        else:
            city = row['city']
        if row['state'] == '':
            state = None
        else:
            state = row['state']
        if row['zip'] == '':
            Zip = None
        else:
            Zip = row['zip']
        if row['amount'] == '':
            amount = None
        else:
            amount = row['amount']
        if row['date'] == '':
            date = None
        else:
            date = datetime.datetime.fromisoformat(row['date'])
        if row['candidate_id'] == '':
            can_id = None
        else:
            can_id = row['candidate_id']
            
        db_row = Contributions(last_name = l_name,
                              first_name = f_name,
                              middle_name = m_name,
                              street_1 = s_1,
                              street_2 = s_2,
                              city = city,
                              state = state,
                              zip_code = Zip,
                              amount = amount,
                              candidate_id = can_id,
                              date = date)
        session.add(db_row)
        try:
            session.commit()
        except:
            session.rollback()          

175it [00:00, 1265.76it/s]
