Build Summary Tables  
    
      
      
      
1. Sales by date by hour:

In [1]:
from google.cloud import bigquery
import pandas as pd

# Initialize a BigQuery client
client = bigquery.Client()

# Define project and dataset IDs as variables
project_id = 'wedgeproject-rileyororke'
dataset_id = 'transaction_tables'

# Query to generate the 'Sales by date by hour' summary table
query_sales_by_hour = f"""
    SELECT
        DATE(datetime) AS calendar_date,
        EXTRACT(HOUR FROM datetime) AS hour,
        SUM(total) AS total_spend,
        COUNT(datetime) AS num_transactions,
        SUM(CASE WHEN trans_status = ' ' OR trans_status = '' THEN 1 ELSE 0 END) AS num_items
    FROM `{project_id}.{dataset_id}.transArchive_*`
    WHERE CAST(card_no AS INT64) != 3  -- Exclude non-owners
    GROUP BY calendar_date, hour
    ORDER BY calendar_date, hour
"""

# Execute the query and fetch the data
df_sales_by_hour = client.query(query_sales_by_hour).to_dataframe()

# Display the result
print(df_sales_by_hour.head())







  calendar_date  hour   total_spend  num_transactions  num_items
0    2010-01-01     9 -7.105427e-15               254        175
1    2010-01-01    10  3.213000e+02              1019        771
2    2010-01-01    11 -2.258700e+02              1076        784
3    2010-01-01    12 -6.736000e+01              1109        779
4    2010-01-01    13  2.016000e+01              1562       1101


2. Sales by owner by year by month:

In [2]:
from google.cloud import bigquery
import pandas as pd

# Initialize a BigQuery client
client = bigquery.Client()

# Define project and dataset IDs as variables
project_id = 'wedgeproject-rileyororke'
dataset_id = 'transaction_tables'

# Query to generate the 'Sales by owner by year by month' summary table
query_sales_by_owner = f"""
    SELECT
        CAST(card_no AS INT64) AS card_no,  -- Owner ID
        EXTRACT(YEAR FROM datetime) AS year,
        EXTRACT(MONTH FROM datetime) AS month,
        SUM(total) AS total_sales,
        COUNT(datetime) AS num_transactions,
        SUM(CASE WHEN trans_status = ' ' OR trans_status = '' THEN 1 ELSE 0 END) AS num_items
    FROM `{project_id}.{dataset_id}.transArchive_*`
    WHERE CAST(card_no AS INT64) != 3  -- Exclude non-owners
    GROUP BY card_no, year, month
    ORDER BY card_no, year, month
"""

# Execute the query and fetch the data
df_sales_by_owner = client.query(query_sales_by_owner).to_dataframe()

# Display the result
print(df_sales_by_owner.head())






   card_no  year  month  total_sales  num_transactions  num_items
0        1  2012      1        89.65                 5          0
1        1  2012      2        86.79                11          0
2        1  2012      3        44.21                 9          0
3        1  2012      4       160.66                14          0
4        1  2012      5        89.45                 5          0


3. Sales by product description by year by month:

In [4]:
from google.cloud import bigquery
import pandas as pd

# Initialize a BigQuery client
client = bigquery.Client()

# Define project and dataset IDs as variables
project_id = 'wedgeproject-rileyororke'
dataset_id = 'transaction_tables'

# Query for 'Sales by product description by year by month'
query_sales_by_product = f"""
    WITH sales_data AS (
        SELECT
            upc,
            description,
            department,
            EXTRACT(YEAR FROM datetime) AS year,
            EXTRACT(MONTH FROM datetime) AS month,
            SUM(total) AS total_sales,
            COUNT(datetime) AS num_transactions,
            SUM(CASE WHEN trans_status = ' ' OR trans_status = '' THEN 1 ELSE 0 END) AS num_items
        FROM `{project_id}.{dataset_id}.transArchive_*`
        WHERE CAST(card_no AS INT64) != 3  -- Exclude non-owners
        GROUP BY upc, description, department, year, month
    )
    SELECT sd.*, dl.dept_name
    FROM sales_data sd
    LEFT JOIN `{project_id}.{dataset_id}.department_lookup` dl
    ON sd.department = dl.department
    ORDER BY year, month
"""

# Execute the query and fetch the data
df_sales_by_product = client.query(query_sales_by_product).to_dataframe()

# Display the result
print(df_sales_by_product.head())




             upc                 description  department  year  month  \
0        6.39DP9                   GEN MERCH         9.0  2010      1   
1  0000000013615   NUTS Almonds Raw Slivered         3.0  2010      1   
2  0000000018505       MIX Hot Cocoa Organic         3.0  2010      1   
3  0000000018605     BAKING Active Dry Yeast         3.0  2010      1   
4  0000000018622  BAKING Choc Chips Dark Org         3.0  2010      1   

   total_sales  num_transactions  num_items  dept_name  
0       102.24                14         14  GEN MERCH  
1       316.23               118        116       BULK  
2        73.70                22         22       BULK  
3       137.89               115        113       BULK  
4       618.48               104        104       BULK  


Create SQLite Database and Insert Data

In [5]:
import sqlite3  
import pandas as pd

# Create SQLite connection and cursor
conn = sqlite3.connect('wedge_coop_summary.db')
cursor = conn.cursor()

# Drop the tables if they already exist
cursor.execute('DROP TABLE IF EXISTS sales_by_hour')
cursor.execute('DROP TABLE IF EXISTS sales_by_owner')
cursor.execute('DROP TABLE IF EXISTS sales_by_product')

# Create the tables in SQLite
cursor.execute('''
CREATE TABLE sales_by_hour (
    year INTEGER,
    month INTEGER,
    day INTEGER,
    hour INTEGER,
    total_spend REAL,
    num_transactions INTEGER,
    num_items INTEGER
)''')

cursor.execute('''
CREATE TABLE sales_by_owner (
    card_no INTEGER,
    year INTEGER,
    month INTEGER,
    total_sales REAL,
    num_transactions INTEGER,
    num_items INTEGER
)''')

cursor.execute('''
CREATE TABLE sales_by_product (
    upc TEXT,
    description TEXT,
    department INTEGER,
    year INTEGER,
    month INTEGER,
    total_sales REAL,
    num_transactions INTEGER,
    num_items INTEGER
)''')

# Insert data into SQLite tables
# Insert sales by hour
df_sales_by_hour.to_sql('sales_by_hour', conn, if_exists='replace', index=False)

# Insert sales by owner
df_sales_by_owner.to_sql('sales_by_owner', conn, if_exists='replace', index=False)

# Insert sales by product
df_sales_by_product.to_sql('sales_by_product', conn, if_exists='replace', index=False)

# Commit changes and close the connection
conn.commit()
conn.close()

print("Summary tables created and stored in 'wedge_coop_summary.db'")


Summary tables created and stored in 'wedge_coop_summary.db'
