In [5]:
# ================================
# SALES & QUANTITY SUMMARY – SEQUENTIAL PERIODS (NO YoY)
# ================================
import pandas as pd
import pyodbc
from sqlalchemy import create_engine
from datetime import datetime, timedelta
import numpy as np

# ================================
# CONFIGURATION
# ================================
CONFIG = {
    # --- MSSQL PULL ---
    'sql_query': """
        SELECT
            TranDate_DDMMYYYY AS order_date,
            NetSales AS sales_value,
            qty AS quantity
        FROM TEST1.dbo._Gift_from_Carpenters
        WHERE NetSales IS NOT NULL
    """,
    'server': '100.99.225.51',
    'database': 'TEST1',
    'username': 'SA',
    'password': 'a31536000',
    'driver': '{ODBC Driver 17 for SQL Server}',
    'encrypt': 'yes',
    'trust_server_certificate': 'yes',
    # --- TIMEOUTS ---
    'connection_timeout': 300,
    'query_timeout': 300,
    # --- DATA SETTINGS ---
    'date_column': None,
    'value_column': None,
    'quantity_column': None,
    'date_format': '%d/%m/%Y',
    # --- SUMMARY TABLES ---
    'past_summary_table': 'past_timeframe_performance_summary',
    'recent_summary_table': 'recent_timeframe_performance_summary'
}

# ================================
# 1. LOAD FROM MSSQL
# ================================
def load_data(config):
    conn_str = (
        f"DRIVER={config['driver']};"
        f"SERVER={config['server']};"
        f"DATABASE={config['database']};"
        f"UID={config['username']};"
        f"PWD={config['password']};"
        f"Encrypt={config['encrypt']};"
        f"TrustServerCertificate={config['trust_server_certificate']};"
    )
    try:
        print("Connecting to MSSQL...")
        conn = pyodbc.connect(conn_str)
        df = pd.read_sql(config['sql_query'], conn)
        conn.close()
        print(f"Loaded {len(df):,} rows from DB")

        date_col = config['date_column'] or 'order_date'
        value_col = config['value_column'] or 'sales_value'
        quantity_col = config['quantity_column'] or 'quantity'

        df[date_col] = pd.to_datetime(df[date_col], format=config['date_format'], errors='coerce')
        df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
        df[quantity_col] = pd.to_numeric(df[quantity_col], errors='coerce')
        df = df.dropna(subset=[date_col, value_col, quantity_col])

        print(f"Cleaned data: {len(df):,} rows")
        return df, date_col, value_col, quantity_col
    except Exception as e:
        print(f"DB Load Failed: {e}")
        return None, None, None, None

# ================================
# 2. CALCULATE SEQUENTIAL PERIOD SUMS
# ================================
def calculate_period_sums(df, date_col, value_col, quantity_col, most_recent_date, is_recent=True):
    """
    is_recent=True  → Current (partial) periods
    is_recent=False → Immediately prior (completed) periods
    """
    sums = {}

    if is_recent:
        # === RECENT (current) ===
        # Day: most recent day
        day_start = day_end = most_recent_date
        sums['day'] = {
            'total_sales': df[df[date_col] == day_end][value_col].sum(),
            'total_quantity': df[df[date_col] == day_end][quantity_col].sum()
        }

        # Week: last 7 days (incl. most recent)
        week_start = most_recent_date - timedelta(days=6)
        week_end = most_recent_date
        mask = (df[date_col] >= week_start) & (df[date_col] <= week_end)
        sums['week'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

        # Month: current month to date
        month_start = most_recent_date.replace(day=1)
        month_end = most_recent_date
        mask = (df[date_col] >= month_start) & (df[date_col] <= month_end)
        sums['month'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

        # Quarter: current quarter to date
        quarter = (most_recent_date.month - 1) // 3 + 1
        if quarter == 1:
            q_start = most_recent_date.replace(month=1, day=1)
        elif quarter == 2:
            q_start = most_recent_date.replace(month=4, day=1)
        elif quarter == 3:
            q_start = most_recent_date.replace(month=7, day=1)
        else:
            q_start = most_recent_date.replace(month=10, day=1)
        mask = (df[date_col] >= q_start) & (df[date_col] <= most_recent_date)
        sums['quarter'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

        # Year: current year to date
        year_start = most_recent_date.replace(month=1, day=1)
        year_end = most_recent_date
        mask = (df[date_col] >= year_start) & (df[date_col] <= year_end)
        sums['year'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

    else:
        # === PAST (prior completed) ===
        # Day: day before most recent
        day_end = most_recent_date - timedelta(days=1)
        sums['day'] = {
            'total_sales': df[df[date_col] == day_end][value_col].sum(),
            'total_quantity': df[df[date_col] == day_end][quantity_col].sum()
        }

        # Week: 7 days before recent week
        week_end = most_recent_date - timedelta(days=7)
        week_start = week_end - timedelta(days=6)
        mask = (df[date_col] >= week_start) & (df[date_col] <= week_end)
        sums['week'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

        # Month: full previous calendar month
        prev_month_end = most_recent_date.replace(day=1) - timedelta(days=1)
        prev_month_start = prev_month_end.replace(day=1)
        mask = (df[date_col] >= prev_month_start) & (df[date_col] <= prev_month_end)
        sums['month'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

        # Quarter: full previous calendar quarter
        current_q = (most_recent_date.month - 1) // 3 + 1
        prev_q = current_q - 1 if current_q > 1 else 4
        prev_q_year = most_recent_date.year if current_q > 1 else most_recent_date.year - 1
        if prev_q == 1:
            q_start = pd.Timestamp(year=prev_q_year, month=1, day=1)
            q_end = pd.Timestamp(year=prev_q_year, month=3, day=31)
        elif prev_q == 2:
            q_start = pd.Timestamp(year=prev_q_year, month=4, day=1)
            q_end = pd.Timestamp(year=prev_q_year, month=6, day=30)
        elif prev_q == 3:
            q_start = pd.Timestamp(year=prev_q_year, month=7, day=1)
            q_end = pd.Timestamp(year=prev_q_year, month=9, day=30)
        else:
            q_start = pd.Timestamp(year=prev_q_year, month=10, day=1)
            q_end = pd.Timestamp(year=prev_q_year, month=12, day=31)
        mask = (df[date_col] >= q_start) & (df[date_col] <= q_end)
        sums['quarter'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

        # Year: full previous calendar year
        prev_year_start = pd.Timestamp(year=most_recent_date.year - 1, month=1, day=1)
        prev_year_end = pd.Timestamp(year=most_recent_date.year - 1, month=12, day=31)
        mask = (df[date_col] >= prev_year_start) & (df[date_col] <= prev_year_end)
        sums['year'] = {
            'total_sales': df[mask][value_col].sum(),
            'total_quantity': df[mask][quantity_col].sum()
        }

    # Build DataFrame
    summary_df = pd.DataFrame({
        'period': list(sums.keys()),
        'total_sales': [v['total_sales'] for v in sums.values()],
        'total_quantity': [v['total_quantity'] for v in sums.values()],
        'calculated_date': most_recent_date.date()
    })
    return summary_df

# ================================
# 3. PUSH TO DB
# ================================
def push_summary_to_db(summary_df, config, table_name):
    conn_str = (
        f"DRIVER={config['driver']};"
        f"SERVER={config['server']};"
        f"DATABASE={config['database']};"
        f"UID={config['username']};"
        f"PWD={config['password']};"
        f"Encrypt={config['encrypt']};"
        f"TrustServerCertificate={config['trust_server_certificate']};"
    )
    try:
        conn = pyodbc.connect(conn_str)
        cursor = conn.cursor()
        cursor.execute(f"""
            IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = '{table_name}')
            CREATE TABLE {table_name} (
                period VARCHAR(50),
                total_sales FLOAT,
                total_quantity FLOAT,
                calculated_date DATE
            )
        """)
        cursor.execute(f"TRUNCATE TABLE {table_name}")
        conn.commit()

        engine = create_engine(f"mssql+pyodbc:///?odbc_connect={conn_str.replace(';', '%3B')}")
        summary_df.to_sql(table_name, engine, if_exists='append', index=False, method='multi')
        conn.close()
        print(f"Pushed {len(summary_df)} rows to `{table_name}`")
    except Exception as e:
        print(f"Push failed for {table_name}: {e}")

# ================================
# MAIN
# ================================
def main(config):
    df, date_col, value_col, quantity_col = load_data(config)
    if df is None or df.empty:
        print("No data to process.")
        return

    most_recent_date = df[date_col].max()
    print(f"\nMost recent date in data: {most_recent_date.date()}")

    # Recent (current partial periods)
    recent_summary = calculate_period_sums(df, date_col, value_col, quantity_col, most_recent_date, is_recent=True)
    print("\nRecent Timeframe Sums:")
    print(recent_summary)

    # Past (prior completed periods)
    past_summary = calculate_period_sums(df, date_col, value_col, quantity_col, most_recent_date, is_recent=False)
    print("\nPast Timeframe Sums:")
    print(past_summary)

    # Push to DB
    push_summary_to_db(recent_summary, config, config['recent_summary_table'])
    push_summary_to_db(past_summary, config, config['past_summary_table'])

if __name__ == "__main__":
    main(CONFIG)

Connecting to MSSQL...


  df = pd.read_sql(config['sql_query'], conn)


Loaded 27,174 rows from DB
Cleaned data: 27,174 rows

Most recent date in data: 2022-01-07

Recent Timeframe Sums:
    period   total_sales  total_quantity calculated_date
0      day  1.659017e+06    5.547116e+05      2022-01-07
1     week  8.490459e+06    2.759774e+06      2022-01-07
2    month  8.490459e+06    2.759774e+06      2022-01-07
3  quarter  8.490459e+06    2.759774e+06      2022-01-07
4     year  8.490459e+06    2.759774e+06      2022-01-07

Past Timeframe Sums:
    period   total_sales  total_quantity calculated_date
0      day  1.370359e+06   459571.598932      2022-01-07
1     week  0.000000e+00        0.000000      2022-01-07
2    month  0.000000e+00        0.000000      2022-01-07
3  quarter  0.000000e+00        0.000000      2022-01-07
4     year  0.000000e+00        0.000000      2022-01-07
Pushed 5 rows to `recent_timeframe_performance_summary`
Pushed 5 rows to `past_timeframe_performance_summary`
