In [1]:
import sqlite3
print(sqlite3.version_info)  # Should show version info

(2, 6, 0)


  print(sqlite3.version_info)  # Should show version info


In [2]:
# task1_sqlite.py
import sqlite3
from pathlib import Path

def create_database_schema():
    # Create or connect to database file
    db_path = Path('retail_dw.db')
    if db_path.exists():
        db_path.unlink()  # Remove if exists (for testing)
    
    conn = sqlite3.connect('retail_dw.db')
    cursor = conn.cursor()
    
    # Create Dimension Tables
    cursor.execute('''
    CREATE TABLE CustomerDim (
        customer_id INTEGER PRIMARY KEY,
        name TEXT NOT NULL,
        location TEXT,
        country TEXT,
        demographic_segment TEXT,
        registration_date DATE
    );
    ''')
    
    cursor.execute('''
    CREATE TABLE ProductDim (
        product_id INTEGER PRIMARY KEY,
        name TEXT NOT NULL,
        category TEXT NOT NULL,
        subcategory TEXT,
        supplier TEXT,
        current_price REAL
    );
    ''')
    
    cursor.execute('''
    CREATE TABLE TimeDim (
        time_id INTEGER PRIMARY KEY,
        date DATE NOT NULL,
        day INTEGER NOT NULL,
        month INTEGER NOT NULL,
        quarter INTEGER NOT NULL,
        year INTEGER NOT NULL,
        is_weekend BOOLEAN NOT NULL
    );
    ''')
    
    # Create Fact Table
    cursor.execute('''
    CREATE TABLE SalesFact (
        fact_sales_id INTEGER PRIMARY KEY AUTOINCREMENT,
        customer_id INTEGER NOT NULL,
        product_id INTEGER NOT NULL,
        time_id INTEGER NOT NULL,
        quantity INTEGER NOT NULL,
        unit_price REAL NOT NULL,
        total_sales REAL NOT NULL,
        FOREIGN KEY (customer_id) REFERENCES CustomerDim (customer_id),
        FOREIGN KEY (product_id) REFERENCES ProductDim (product_id),
        FOREIGN KEY (time_id) REFERENCES TimeDim (time_id)
    );
    ''')
    
    # Create indexes for performance
    cursor.execute('CREATE INDEX idx_sales_customer ON SalesFact(customer_id);')
    cursor.execute('CREATE INDEX idx_sales_product ON SalesFact(product_id);')
    cursor.execute('CREATE INDEX idx_sales_time ON SalesFact(time_id);')
    
    # Commit changes and close connection
    conn.commit()
    conn.close()
    print("Database schema created successfully!")

def insert_sample_data():
    """Insert some sample data to test the schema"""
    conn = sqlite3.connect('retail_dw.db')
    cursor = conn.cursor()
    
    # Insert sample customers
    customers = [
        (1, 'John Doe', 'New York', 'USA', 'Adult', '2023-01-15'),
        (2, 'Jane Smith', 'London', 'UK', 'Young', '2023-02-20'),
        (3, 'Bob Johnson', 'Berlin', 'Germany', 'Senior', '2023-03-10')
    ]
    cursor.executemany('''
    INSERT INTO CustomerDim VALUES (?, ?, ?, ?, ?, ?)
    ''', customers)
    
    # Insert sample products
    products = [
        (101, 'Smartphone X', 'Electronics', 'Smartphones', 'TechCorp', 799.99),
        (102, 'Cotton T-Shirt', 'Clothing', 'Men', 'FashionCo', 24.99),
        (103, 'Coffee Table', 'Home', 'Furniture', 'HomeGoods', 149.99)
    ]
    cursor.executemany('''
    INSERT INTO ProductDim VALUES (?, ?, ?, ?, ?, ?)
    ''', products)
    
    # Insert sample time data
    time_data = [
        (20230115, '2023-01-15', 15, 1, 1, 2023, 0),
        (20230220, '2023-02-20', 20, 2, 1, 2023, 0),
        (20230310, '2023-03-10', 10, 3, 1, 2023, 1)
    ]
    cursor.executemany('''
    INSERT INTO TimeDim VALUES (?, ?, ?, ?, ?, ?, ?)
    ''', time_data)
    
    # Insert sample sales facts
    sales = [
        (1, 101, 20230115, 2, 799.99, 1599.98),
        (2, 102, 20230220, 3, 24.99, 74.97),
        (3, 103, 20230310, 1, 149.99, 149.99)
    ]
    cursor.executemany('''
    INSERT INTO SalesFact (customer_id, product_id, time_id, quantity, unit_price, total_sales)
    VALUES (?, ?, ?, ?, ?, ?)
    ''', sales)
    
    conn.commit()
    conn.close()
    print("Sample data inserted successfully!")

def test_queries():
    """Test some basic queries"""
    conn = sqlite3.connect('retail_dw.db')
    cursor = conn.cursor()
    
    print("\nTotal sales by customer:")
    cursor.execute('''
    SELECT c.name, SUM(f.total_sales) as total
    FROM SalesFact f
    JOIN CustomerDim c ON f.customer_id = c.customer_id
    GROUP BY c.name
    ''')
    for row in cursor.fetchall():
        print(f"{row[0]}: ${row[1]:.2f}")
    
    print("\nSales by product category:")
    cursor.execute('''
    SELECT p.category, SUM(f.total_sales) as total
    FROM SalesFact f
    JOIN ProductDim p ON f.product_id = p.product_id
    GROUP BY p.category
    ''')
    for row in cursor.fetchall():
        print(f"{row[0]}: ${row[1]:.2f}")
    
    conn.close()

if __name__ == "__main__":
    create_database_schema()
    insert_sample_data()
    test_queries()

Database schema created successfully!
Sample data inserted successfully!

Total sales by customer:
Bob Johnson: $149.99
Jane Smith: $74.97
John Doe: $1599.98

Sales by product category:
Clothing: $74.97
Electronics: $1599.98
Home: $149.99
