In [46]:
import sqlite3 as lite
from datetime import datetime
from enum import Enum

class db:
    class Options(Enum):
        RETURN_RESULTS = 1
        PRINT_RESULTS = 2
    
    def __init__(self, name):
        self.name = rf"{name}"

    def connect(self):
        self.con = lite.connect(self.name)
        self.cur = self.con.cursor()

    def build_table(self, name):      
        self.execute_sql(f'DROP TABLE IF EXISTS {name}')
        self.execute_sql(TABLE_DEFINITIONS[name])
    
    def execute_sql(self, sql, options=0):
        if isinstance(options, db.Options) and (options.value & db.Options.RETURN_RESULTS.value):
            results = self.cur.execute(sql).fetchall()
            return results
        self.cur.execute(sql)

    def execute_sql_values(self, sql, values):
        self.cur.execute(sql, values)

    def commit(self):
        self.con.commit()

    def close(self):
        self.con.commit()
        self.con.close()
    # ... (use the exact DB class implementation provided in your original question) ...



  

In [116]:
def create_combined_schema(target_db):
    target_db.execute_sql('''CREATE TABLE IF NOT EXISTS combined_sales(
                            date TEXT, 
                            customer_number INT, 
                            sku INT, 
                            sales_price REAL, 
                            items_left INT, 
                            cases_ordered INT)''')

def transform_team8(row):
    """Transform Team 8's data (already in correct format)"""
    return (
        row[0],  # date (YYYY-MM-DD)
        row[1],  # customer_number
        row[2],  # sku
        float(row[3]),  # salesPrice
        int(row[4]),  # items_left
        int(row[5])  # cases_ordered
    )

def transform_team9(row):
    """Transform Team 9's data with column renaming"""
    return (
        row[0],  # date -> date (YYYY-MM-DD)
        row[1],  # customerID -> customer_number
        row[2],  # sku
        float(row[3]),  # salePrice -> sales_price
        int(row[4]),  # itemsLeft -> items_left
        int(row[5])  # co -> cases_ordered
    )

def transform_team10(row):
    """Transform Team 10's data with column renaming"""
    return (
        row[0],  # date1 -> date (YYYY-MM-DD)
        row[1],  # customerID -> customer_number
        row[2],  # sku
        float(row[3]),  # salePrice -> sales_price
        int(row[4]),  # itemsLeft -> items_left
        int(row[5])  # co -> cases_ordered
    )



In [131]:
def combine_data(source_db, query, transform_func, target_db):
    """Generic function to combine data from different sources"""
    results = source_db.execute_sql(query, options=db.Options.RETURN_RESULTS)
    for row in results:
        try:
            transformed = transform_func(row)
            target_db.execute_sql_values(
                '''INSERT INTO combined_sales 
                   VALUES (?, ?, ?, ?, ?, ?)''',
                transformed
            )
        except Exception as e:
            print(f"Error processing row: {row}")
            print(f"Error: {str(e)}")
    target_db.commit()



def generate_top25_report(target_db, output_file="top_25_products.csv"):
    """Generate the final report for top 25 products by total sales"""
    sql_combined = '''
    WITH ranked_sales AS (
        SELECT 
            sku, 
            SUM(sales_price) AS total_sales, 
            RANK() OVER (ORDER BY SUM(sales_price) DESC) AS rank
        FROM combined_sales
        WHERE date >= '2024-12-01'
        GROUP BY sku
    )
    SELECT sku, total_sales, rank
    FROM ranked_sales
    WHERE rank <= 25
    ORDER BY rank;
    '''

    # Execute the query and fetch results
    results = target_db.execute_sql(sql_combined, options=db.Options.RETURN_RESULTS)
    
    # Write the results to a CSV file
    with open(output_file, 'w') as f:
        f.write("SKU,Total Sales,Rank\n")  # CSV header
        for row in results:
            f.write(f"{row[0]},{row[1]},{row[2]}\n")  # Write SKU, total sales, and rank

    print(f"Report generated: {output_file}")


In [133]:
def main():
    # Initialize combined database
    combined_db = db('grocery_etl_staging.db')
    combined_db.connect()
    create_combined_schema(combined_db)

    # Connect to source databases
    db8 = db('/Users/nikitabrahmbhatt/C/group/grocery_team_8.db')
    db9 = db('/Users/nikitabrahmbhatt/C/group/grocery_team_9.db')
    db10 = db('/Users/nikitabrahmbhatt/C/group/grocery_team_10.db')
    
    for source in [db8, db9, db10]:
        source.connect()

    # Combine data from all sources
    combine_data(db8, 
                "SELECT * FROM sales_transactions WHERE date >= '2024-12-01'",
                transform_team8,
                combined_db)

    combine_data(db9,
                "SELECT * FROM transactions WHERE date >= '2024-12-01'",  # Fixed query to use 'date' instead of 'date1'
                transform_team9,
                combined_db)

    combine_data(db10,
                "SELECT * FROM sales_transactions WHERE date >= '2024-12-01'",
                transform_team10,
                combined_db)

    # Generate report for top 25 products
    generate_top25_report(combined_db)

    # Cleanup
    for db_conn in [db8, db9, db10, combined_db]:
        db_conn.close()

if __name__ == "__main__":
    main()


Report generated: top_25_products.csv
