# Sql

In [177]:
# Import libraries
import pandas as pd
import sqlite3

In [301]:
# Sqlite Setting
def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d

conn = sqlite3.connect("../data/sql.db")
conn.row_factory = dict_factory
iris_df = pd.read_csv("../data/iris.csv")
iris_df.columns = [colName.replace(".", "_").lower() for colName in iris_df.columns]
iris_df.to_sql("iris", conn, if_exists="replace", index=False)
cursor = conn.cursor()

In [323]:
# Define excute sql function
def execute_sql(sql, msg=None, showNum=5):
    if msg is not None:
        print("{} query result, show {}>= rows".format(msg, showNum))
    cursor.execute(sql)
    sql_df = pd.DataFrame.from_dict(cursor.fetchmany(showNum))
    return sql_df

In [324]:
# TEST
sql = """
            SELECT
                *
            FROM
                iris
      """
execute_sql(sql, "TEST")

TEST query result, show 5>= rows


Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,1.4,0.2,5.1,3.5,setosa
1,1.4,0.2,4.9,3.0,setosa
2,1.3,0.2,4.7,3.2,setosa
3,1.5,0.2,4.6,3.1,setosa
4,1.4,0.2,5.0,3.6,setosa


In [325]:
# CASE
sql = """
            SELECT
                CASE
                  WHEN species = "setosa"
                      THEN "True"
                      ELSE "False"
                END AS is_setosa,
                sepal_length,
                sepal_width
            FROM
                iris
      """
execute_sql(sql, "CASE")

CASE query result, show 5>= rows


Unnamed: 0,is_setosa,sepal_length,sepal_width
0,True,5.1,3.5
1,True,4.9,3.0
2,True,4.7,3.2
3,True,4.6,3.1
4,True,5.0,3.6


In [326]:
# DISTINCT
sql = """
            SELECT DISTINCT
                species
            FROM
                iris
      """
execute_sql(sql, "DISTINCT")

DISTINCT query result, show 5>= rows


Unnamed: 0,species
0,setosa
1,versicolor
2,virginica


In [372]:
# OFFSET
sql = """
            SELECT
                *
            FROM
                iris
            LIMIT 3 OFFSET 100
      """
execute_sql(sql, "OFFSET")

OFFSET query result, show 5>= rows


Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,6.0,2.5,6.3,3.3,virginica
1,5.1,1.9,5.8,2.7,virginica
2,5.9,2.1,7.1,3.0,virginica


In [446]:
# UNION
# UNION: Remove duplicate
# UNION ALL: Do not remove duplicate
sql = """
            SELECT
                *
            FROM
                iris
            WHERE
                petal_length > 6.5
            UNION
            SELECT
                *
            FROM
                iris
            WHERE
                sepal_width > 3.6
      """
execute_sql(sql, "UNION")

UNION query result, show 5>= rows


Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,1.5,0.4,5.1,3.7,setosa
1,1.5,0.3,5.1,3.8,setosa
2,1.6,0.2,5.1,3.8,setosa
3,1.9,0.4,5.1,3.8,setosa
4,1.5,0.1,5.2,4.1,setosa


In [374]:
# EXCEPT
sql = """
            SELECT
                *
            FROM
                iris
            WHERE
                petal_length > 6.5
            EXCEPT
            SELECT
                *
            FROM
                iris
            WHERE
                sepal_width > 3.6
      """
execute_sql(sql, "EXCEPT")

EXCEPT query result, show 5>= rows


Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,6.6,2.1,7.6,3.0,virginica
1,6.9,2.3,7.7,2.6,virginica
2,6.7,2.0,7.7,2.8,virginica


In [375]:
# INTERSECT
sql = """
            SELECT
                *
            FROM
                iris
            WHERE
                petal_length > 6.5
            INTERSECT
            SELECT
                *
            FROM
                iris
            WHERE
                sepal_width > 3.6
      """
execute_sql(sql, "INTERSECT")

INTERSECT query result, show 5>= rows


Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width,species
0,6.7,2.2,7.7,3.8,virginica


In [430]:
# RANK
# Sqlite does not support window function
# Window function: RANK() OVER (PARTITION BY species ORDER BY petal_length DESC) AS Rank  
sql = """
            SELECT DISTINCT
                petal_length,
                (SELECT
                    1 + count(*)
                FROM
                    iris as iris_sub
                WHERE
                    iris_sub.petal_length > iris.petal_length
                        AND
                    iris_sub.species = iris.species) AS rank,
                species
            FROM
                iris
            WHERE
                rank <= 3
            ORDER BY
                rank, species
      """
execute_sql(sql, "RANK", showNum=10)

RANK query result, show 10>= rows


Unnamed: 0,petal_length,rank,species
0,1.9,1,setosa
1,5.1,1,versicolor
2,6.9,1,virginica
3,5.0,2,versicolor
4,6.7,2,virginica
5,1.7,3,setosa
6,4.9,3,versicolor


In [437]:
# HAVING 
sql = """
            SELECT
                avg(petal_length) as petal_length,
                species
            FROM
                iris
            GROUP BY
                species
            HAVING
                avg(petal_length) > 4
      """
execute_sql(sql, "HAVING")

HAVING query result, show 5>= rows


Unnamed: 0,petal_length,species
0,4.26,versicolor
1,5.552,virginica


In [447]:
# Close
cursor.close()
conn.close()