In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)
 
    return conn



In [3]:
def create_tables():
    '''Function to create tables
    Args:
        None
    Returns:
        None
    '''

    conn = create_connection('problem25.db')
    cur = conn.cursor()
    
    cur.execute('DROP TABLE IF EXISTS stores')
    cur.execute('DROP TABLE IF EXISTS products')
    cur.execute('DROP TABLE IF EXISTS sales')
    conn.commit()

    #store_id, location
    store_stuff =np.array([
    [91110, 'New York'],
    [99525, 'Los Angeles'],
    [37340, 'Tokyo'],
    [32016, 'Detroit'],
    [57507, 'London']])

    print(store_stuff[:,0])
    store_df = pd.DataFrame({'store_id':store_stuff[:,0], 'location':store_stuff[:,1]}) 
    
    
    store_query = '''
        create table if not exists stores(
        store_id int,
        location varchar(25)
        );
    '''

    cur.execute(store_query)
    conn.commit()

    store_insert = '''
        INSERT INTO stores(store_id, location) VALUES(?,?)
    '''

    for _, row in store_df.iterrows():
        cur.execute(store_insert, (row['store_id'], row['location']))
        conn.commit()

    #product_id product_name price_usd
    product_stuff = np.array([
    [31331, 'Apples', 2],
    [34611, 'Lettuce', 3],
    [49760, 'Chicken',5],
    [26583, 'Lemons', 1],
    [20267, 'Bread', 2]])

    product_df = pd.DataFrame({'product_id':product_stuff[:,0], 'product_name':product_stuff[:,1] ,
                               'price_usd':product_stuff[:,2]})

    product_query = '''
        CREATE TABLE products(
        product_id int,
        product_name varachar(25),
        price_usd int
        )
    '''

    cur.execute(product_query)
    conn.commit()

    poduct_insert = '''
        INSERT INTO products(product_id, product_name, price_usd) VALUES(?,?,?)
    '''

    for _, row in product_df.iterrows():
        cur.execute(poduct_insert, (row['product_id'], row['product_name'], row['price_usd']))
        conn.commit()

    #sale_id product_id store_id date
    sales_stuff = np.array([
    [1, 31331, 91110, '02/20/2020'],
    [2, 34611, 57507, '02/20/2020'],
    [3, 26583, 37340, '02/20/2020'],
    [3, 34611, 32016, '02/20/2020'],
    [3, 20267, 99525, '02/21/2020'],
    [4, 31331, 99525, '02/21/2020'],
    [5, 49760, 99525, '02/21/2020'],
    [6, 34611, 57507, '02/21/2020'],
    [7, 31331, 91110, '02/21/2020']])

    sales_df = pd.DataFrame({'sale_id':sales_stuff[:,0], 'product_id':sales_stuff[:,1], 
                             'store_id':sales_stuff[:,2], 'date':sales_stuff[:,3]})

    create_sales = '''
        CREATE TABLE sales(
        sale_id int,
        product_id int,
        store_id int,
        date varchar(25)
        )
    '''

    cur.execute(create_sales)
    conn.commit()
    
    insert_sales = '''
        INSERT INTO sales(sale_id, product_id, store_id, date) VALUES(?,?,?,?)
    '''

    for _, row in sales_df.iterrows():
        cur.execute(insert_sales, (row['sale_id'], row['product_id'], row['store_id'], row['date']))
        conn.commit()
    



In [4]:
create_tables()

['91110' '99525' '37340' '32016' '57507']


In [5]:
def select_stores():
    conn = create_connection('problem25.db')
    cur = conn.cursor()
    
    stores_df = pd.read_sql('SELECT * FROM stores', conn)
    products_df = pd.read_sql('SELECT * FROM products', conn)
    sales_df = pd.read_sql('SELECT * FROM sales', conn)
    
    print(stores_df)
    print(products_df)
    print(sales_df)

In [6]:

select_stores()

   store_id     location
0     91110     New York
1     99525  Los Angeles
2     37340        Tokyo
3     32016      Detroit
4     57507       London
   product_id product_name  price_usd
0       31331       Apples          2
1       34611      Lettuce          3
2       49760      Chicken          5
3       26583       Lemons          1
4       20267        Bread          2
   sale_id  product_id  store_id        date
0        1       31331     91110  02/20/2020
1        2       34611     57507  02/20/2020
2        3       26583     37340  02/20/2020
3        3       34611     32016  02/20/2020
4        3       20267     99525  02/21/2020
5        4       31331     99525  02/21/2020
6        5       49760     99525  02/21/2020
7        6       34611     57507  02/21/2020
8        7       31331     91110  02/21/2020


In [None]:
location	number_sales	avg_sale_price

In [15]:
def get_results():
    conn = create_connection('problem25.db')
    
    query = '''
        SELECT 
            location, 
            count (*) as number_sales,
            AVG(price_usd)
        FROM
            stores AS st LEFT JOIN sales AS sl ON st.store_id = sl.store_id
            LEFT JOIN products AS p ON sl.product_id = p.product_id
        GROUP BY
            location
    '''
    
    
    df = pd.read_sql(query, conn)
    
    print(df)
    
    

In [16]:
get_results()

      location  number_sales  AVG(price_usd)
0      Detroit             1             3.0
1       London             2             3.0
2  Los Angeles             3             3.0
3     New York             2             2.0
4        Tokyo             1             1.0
