# Workflow: Loading data from .csv files into SQLite Database

## Load all neccessary packages

In [1]:
import numpy as np
import pandas as pd
import sqlite3
from sqlite3 import Error

## Load csv tables into Pandas 

In [52]:
accounts = pd.read_csv('twm_accounts.csv', sep=';')
checking_acct = pd.read_csv('twm_checking_acct.csv', sep=';')
checking_tran = pd.read_csv('twm_checking_tran.csv', sep=';')
credit_acct = pd.read_csv('twm_credit_acct.csv', sep=';')
credit_tran = pd.read_csv('twm_credit_tran.csv', sep=';')
customer = pd.read_csv('twm_customer.csv', sep = ';')
savings_acct = pd.read_csv('twm_savings_acct.csv', sep=';')
savings_tran = pd.read_csv('twm_savings_tran.csv', sep=';')
transactions = pd.read_csv('twm_transactions.csv', sep=';')

In [57]:
# stripping leading and trailing spaces
#df_obj = accounts.select_dtypes(['object'])
#accounts[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())


acct_nbr              13628063
cust_id                1362806
acct_type                   SV
account_active               Y
acct_start_date     10.12.1995
acct_end_date              NaN
starting_balance       1430.22
ending_balance          284.58
Name: 0, dtype: object

## Create the database. If the databse exists, it will be overwritten. 

In [3]:
def create_connection(path):
    connection = None
    try:
        connection = sqlite3.connect(path)
        print("Connection to SQLite DB successful")
    except Error as e:
        print(f"The error '{e}' occurred")

    return connection

In [4]:
connection = create_connection("twm.db")

Connection to SQLite DB successful


## Executing the queries

In [5]:
def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Query executed successfully")
    except Error as e:
        print(f"The error '{e}' occurred")

## Creating 'accounts' table

**This section could be neglected. I was struggling to load the data and tried to convert object datatype into string. Converting object datatype into string didn't actually work for 'twm_accounts' table.**

In [6]:
#accounts = pd.read_csv('twm_accounts.csv', sep=";")
#accounts['acct_type'] = accounts['acct_type'].astype('str')
#accounts['acct_type'] = accounts['acct_type'].astype('str') 
#accounts['account_active'] = accounts['account_active'].astype('str') 
#accounts['acct_start_date'] = accounts['acct_start_date'].astype('str') 
#accounts['acct_end_date'] = accounts['acct_end_date'].astype('str')

In [7]:
accounts.head()

Unnamed: 0,acct_nbr,cust_id,acct_type,account_active,acct_start_date,acct_end_date,starting_balance,ending_balance
0,13628063,1362806,SV,Y,10.12.1995,,1430.22,284.58
1,4561143213627090,1362709,CC,Y,15.3.1993,,266.34,496.15
2,4561143213628360,1362836,CC,Y,18.3.1992,,55.9,1000.0
3,13633112,1363311,CK,Y,6.7.1995,,11017.13,968.46
4,4561143213633610,1363361,CC,Y,17.6.1994,,849.37,462.28


In [8]:
accounts.dtypes

acct_nbr              int64
cust_id               int64
acct_type            object
account_active       object
acct_start_date      object
acct_end_date        object
starting_balance    float64
ending_balance      float64
dtype: object

In [9]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_accounts(
  cust_id INTEGER,
  acct_nbr BIGINT PRIMARY KEY,
  acct_type TEXT,
  account_active TEXT,
  acct_start_date TEXT,
  acct_end_date TEXT,
  starting_balance REAL,
  ending_balance REAL  
);
"""
execute_query(connection, create_table)

Query executed successfully


## Creating 'customer' table

In [10]:
customer.head()

Unnamed: 0,cust_id,income,age,years_with_bank,nbr_children,gender,marital_status,name_prefix,first_name,last_name,street_nbr,street_name,postal_code,city_name,state_code
0,1362691,26150,46,5,1,M,2,,Donald ...,Marek ...,8298,Second ...,89194,Las Vegas,NV
1,1362487,6605,71,1,0,M,2,,ChingDyi ...,Moussavi ...,10603,Daffodil ...,90159,Los Angeles,CA
2,1363160,18548,38,8,0,F,1,,Rosa ...,Johnston ...,8817,Figueroa ...,90024,Los Angeles,CA
3,1362752,47668,54,3,0,F,1,,Lisa ...,Martin ...,676,Humble ...,90172,Los Angeles,CA
4,1362548,44554,59,9,2,F,4,,Barbara ...,O'Malley ...,6578,C ...,10138,New York City,NY


In [11]:
customer.dtypes

cust_id             int64
income              int64
age                 int64
years_with_bank     int64
nbr_children        int64
gender             object
marital_status      int64
name_prefix        object
first_name         object
last_name          object
street_nbr          int64
street_name        object
postal_code         int64
city_name          object
state_code         object
dtype: object

In [12]:
#Creating fields for twm_customer table

create_table = """
CREATE TABLE  IF NOT EXISTS twm_customer(
    cust_id             INTEGER PRIMARY KEY,
    income              INTEGER,
    age                 INTEGER,
    years_with_bank     INTEGER,
    nbr_children        INTEGER,
    gender              TEXT,
    marital_status      INTEGER,
    name_prefix         TEXT,
    first_name          TEXT,
    last_name           TEXT,
    street_nbr          INTEGER,
    street_name         TEXT,
    postal_code         INTEGER,
    city_name           TEXT,
    state_code          TEXT
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Creating 'transactions' table

In [13]:
transactions.head()

Unnamed: 0,tran_id,acct_nbr,tran_amt,principal_amt,interest_amt,new_balance,tran_date,tran_time,channel,tran_code
0,27,13625623,0.0,0.0,0.0,3753.34,21.10.1995,121656,A,IQ
1,97,13628392,0.0,0.0,0.0,254.49,5.2.1995,153053,V,IQ
2,21,13630842,-97.57,-97.57,0.0,3819.56,23.7.1995,0,P,WD
3,44,13631412,-0.15,-0.15,0.0,224.05,30.1.1995,0,,FK
4,31,13625722,0.0,0.0,0.0,240.55,25.1.1995,204521,B,IQ


In [14]:
transactions.dtypes

tran_id            int64
acct_nbr           int64
tran_amt         float64
principal_amt    float64
interest_amt     float64
new_balance      float64
tran_date         object
tran_time          int64
channel           object
tran_code         object
dtype: object

In [15]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_transactions(
    tran_id            INTEGER,
    acct_nbr           BIGINT,
    tran_amt           REAL,
    principal_amt      REAL,
    interest_amt       REAL,
    new_balance        REAL,
    tran_date          TEXT,
    tran_time          TEXT,
    channel            TEXT,
    tran_code          TEXT,
    PRIMARY KEY (tran_id, acct_nbr)
);
"""
execute_query(connection, create_table)  

Query executed successfully


## 'Checking_acct' Table

In [16]:
checking_acct.head()

Unnamed: 0,cust_id,acct_nbr,minimum_balance,per_check_fee,account_active,acct_start_date,acct_end_date,starting_balance,ending_balance
0,1362548,13625482,3000,0.0,Y,11.11.1986,,6004.34,569.65
1,1362487,13624872,3000,0.0,Y,12.1.1995,,2781.07,1401.21
2,1363017,13630172,3000,0.0,Y,19.3.1994,,2694.91,147.15
3,1362752,13627522,100,0.15,Y,8.1.1994,,487.69,3.76
4,1363282,13632822,100,0.15,Y,5.3.1988,,133.9,84.18


In [17]:
checking_acct.dtypes

cust_id               int64
acct_nbr              int64
minimum_balance       int64
per_check_fee       float64
account_active       object
acct_start_date      object
acct_end_date        object
starting_balance    float64
ending_balance      float64
dtype: object

In [18]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_checking_acct(
    cust_id               INTEGER,
    acct_nbr              BIGINT PRIMARY KEY,
    minimum_balance       INTEGER,
    per_check_fee         REAL,
    account_active        TEXT,
    acct_start_date       TEXT,
    acct_end_date         TEXT,
    starting_balance      REAL,
    ending_balance        REAL   
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Checking Transactions

In [19]:
checking_tran.head()

Unnamed: 0,cust_id,tran_id,tran_amt,principal_amt,interest_amt,new_balance,tran_date,tran_time,channel,tran_code
0,1363481,26,-0.15,-0.15,0.0,58.99,29.1.1995,,,FK
1,1362784,90,-200.0,-200.0,0.0,1380.69,2.7.1995,53313.0,A,WD
2,1363088,46,0.0,0.0,0.0,128.35,26.3.1995,834.0,A,IQ
3,1363306,32,-40.32,-40.32,0.0,433.06,29.1.1995,,P,WD
4,1363251,11,-87.88,-87.88,0.0,1919.55,8.2.1995,,P,WD


In [20]:
checking_tran.dtypes

cust_id            int64
tran_id            int64
tran_amt         float64
principal_amt    float64
interest_amt     float64
new_balance      float64
tran_date         object
tran_time         object
channel           object
tran_code         object
dtype: object

In [21]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_checking_tran(
    cust_id            INTEGER,
    tran_id            INTEGER,
    tran_amt           REAL,
    principal_amt      REAL,
    interest_amt       REAL,
    new_balance        REAL,
    tran_date          TEXT,
    tran_time          TEXT,
    channel            TEXT,
    tran_code          TEXT,
    PRIMARY KEY (cust_id, tran_id)
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Credit Accounts

In [22]:
credit_acct.head()

Unnamed: 0,cust_id,acct_nbr,credit_limit,credit_rating,account_active,acct_start_date,acct_end_date,starting_balance,ending_balance
0,1363160,4561143213631600,1000,0,Y,12.10.1988,,657.46,286.69
1,1362487,4561143213624870,3000,0,Y,8.8.1995,,0.0,1548.23
2,1362548,4561143213625480,6700,0,Y,11.11.1986,,6965.25,68.68
3,1362752,4561143213627520,2400,0,N,9.10.1994,2.4.1995,2302.14,0.0
4,1363017,4561143213630170,1000,0,Y,8.2.1995,,0.0,1000.0


In [23]:
credit_acct.dtypes

cust_id               int64
acct_nbr              int64
credit_limit          int64
credit_rating         int64
account_active       object
acct_start_date      object
acct_end_date        object
starting_balance    float64
ending_balance      float64
dtype: object

In [24]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_credit_acct(
    cust_id               INTEGER,
    acct_nbr              BIGINT PRIMARY KEY,
    credit_limit          INTEGER,
    credit_rating         INTEGER,
    account_active        TEXT,
    acct_start_date       TEXT,
    acct_end_date         TEXT,
    starting_balance      REAL,
    ending_balance        REAL
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Credit Transactions

In [25]:
credit_tran.head()

Unnamed: 0,cust_id,tran_id,tran_amt,principal_amt,interest_amt,new_balance,tran_date,tran_time,channel,tran_code
0,1363088,46,-121.49,-121.49,0.0,-141.0,20.10.1995,101144,E,CG
1,1363306,32,-220.64,-220.64,0.0,-520.13,13.10.1995,83115,E,CG
2,1362959,10,0.0,0.0,0.0,-3900.0,26.2.1995,84129,K,IQ
3,1363429,13,-195.27,-195.27,0.0,-1739.74,3.1.1995,191815,E,CG
4,1363393,14,-37.32,-37.32,0.0,-198.98,4.7.1995,100811,E,CG


In [26]:
credit_tran.dtypes

cust_id            int64
tran_id            int64
tran_amt         float64
principal_amt    float64
interest_amt     float64
new_balance      float64
tran_date         object
tran_time         object
channel           object
tran_code         object
dtype: object

In [27]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_credit_tran(
    cust_id            INTEGER,
    tran_id            INTEGER,
    tran_amt           REAL,
    principal_amt      REAL,
    interest_amt       REAL,
    new_balance        REAL,
    tran_date          TEXT,
    tran_time          TEXT,
    channel            TEXT,
    tran_code          TEXT,
    PRIMARY KEY (cust_id, tran_id)
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Savings Accounts

In [28]:
savings_acct.head()

Unnamed: 0,cust_id,acct_nbr,minimum_balance,acct_type,account_active,acct_start_date,acct_end_date,starting_balance,ending_balance
0,1363160,13631603,100,BS,Y,9.5.1988,,113.04,122.54
1,1362487,13624873,500,MM,N,27.6.1994,25.8.1995,651.73,12.89
2,1362548,13625483,100,BS,Y,11.11.1986,,250.24,254.02
3,1362752,13627523,100,BS,Y,20.2.1995,,600.26,609.33
4,1363017,13630173,100,BS,N,27.4.1994,31.10.1995,2011.02,2901.35


In [29]:
savings_acct.dtypes

cust_id               int64
acct_nbr              int64
minimum_balance       int64
acct_type            object
account_active       object
acct_start_date      object
acct_end_date        object
starting_balance    float64
ending_balance      float64
dtype: object

In [30]:
create_table = """
CREATE TABLE  IF NOT EXISTS twm_savings_acct(
    cust_id               INTEGER,
    acct_nbr              BIGINT PRIMARY KEY,
    minimum_balance       INTEGER,
    acct_type             TEXT,
    account_active        TEXT,
    acct_start_date       TEXT,
    acct_end_date         TEXT,
    starting_balance      REAL,
    ending_balance        REAL
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Savings Transactions

In [31]:
savings_tran.head()

Unnamed: 0,cust_id,tran_id,tran_amt,principal_amt,interest_amt,new_balance,tran_date,tran_time,channel,tran_code
0,1363481,26,136.03,136.03,0.0,1521.57,26.10.1995,84111,A,DP
1,1362746,1,2.83,0.0,2.83,2265.9,31.1.1995,235959,,IN
2,1363251,11,13.56,0.0,13.56,5438.9,30.4.1995,235959,,IN
3,1362542,4,0.76,0.0,0.76,610.92,30.4.1995,235959,,IN
4,1363387,11,148.69,148.69,0.0,470.05,21.5.1995,144736,E,DP


In [32]:
savings_tran.dtypes

cust_id            int64
tran_id            int64
tran_amt         float64
principal_amt    float64
interest_amt     float64
new_balance      float64
tran_date         object
tran_time         object
channel           object
tran_code         object
dtype: object

In [33]:
create_table = """
CREATE TABLE IF NOT EXISTS twm_savings_tran(
    cust_id            INTEGER,
    tran_id            INTEGER,
    tran_amt           REAL,
    principal_amt      REAL,
    interest_amt       REAL,
    new_balance        REAL,
    tran_date          TEXT,
    tran_time          TEXT,
    channel            TEXT,
    tran_code          TEXT,
    PRIMARY KEY (cust_id, tran_id)
);
"""
execute_query(connection, create_table)  

Query executed successfully


## Loading Data from .csv files

In [34]:
#from sqlalchemy import create_engine
#engine = create_engine('sqlite://', echo=False)

In [35]:
accounts.to_sql('twm_accounts', con=connection, if_exists='append', index=False)
customer.to_sql('twm_customer', con=connection, if_exists='append', index=False)
transactions.to_sql('twm_transactions', con=connection, if_exists='append', index=False)
checking_acct.to_sql('twm_checking_acct', con=connection, if_exists='append', index=False)
checking_tran.to_sql('twm_checking_tran', con=connection, if_exists='append', index=False)
credit_acct.to_sql('twm_credit_acct', con=connection, if_exists='append', index=False)
credit_tran.to_sql('twm_credit_tran', con=connection, if_exists='append', index=False)
savings_acct.to_sql('twm_savings_acct', con=connection, if_exists='append', index=False)
savings_tran.to_sql('twm_savings_tran', con=connection, if_exists='append', index=False)


In [36]:
#accounts.to_sql('twm_accounts', con = engine)

## Read from Table

In [37]:
def execute_read_query(connection, query):
    cursor = connection.cursor()
    result = None
    try:
        cursor.execute(query)
        result = cursor.fetchall()
        return result
    except Error as e:
        print(f"The error '{e}' occurred")

In [38]:
#select_twm_accounts = "SELECT * from twm_accounts"
#users = execute_read_query(connection, select_twm_accounts)


In [39]:
pd.read_sql('SELECT * FROM twm_accounts', con = connection)

Unnamed: 0,cust_id,acct_nbr,acct_type,account_active,acct_start_date,acct_end_date,starting_balance,ending_balance
0,1362806,13628063,SV,Y,10.12.1995,,1430.22,284.58
1,1362709,4561143213627090,CC,Y,15.3.1993,,266.34,496.15
2,1362836,4561143213628360,CC,Y,18.3.1992,,55.90,1000.00
3,1363311,13633112,CK,Y,6.7.1995,,11017.13,968.46
4,1363361,4561143213633610,CC,Y,17.6.1994,,849.37,462.28
...,...,...,...,...,...,...,...,...
1404,1362556,4561143213625560,CC,N,20.1.1992,19.8.1995,1433.94,20.00
1405,1362624,4561143213626240,CC,Y,29.11.1990,,632.75,1400.00
1406,1362875,4561143213628750,CC,Y,24.2.1989,,2810.91,3000.00
1407,1362654,13626543,SV,Y,17.10.1992,,784.53,622.46
