In [1]:
import duckdb
import os

# Get Wasabi credentials from environment
wasabi_endpoint = os.getenv('WASABI_ENDPOINT', 's3.us-east-2.wasabisys.com')
wasabi_access_key = os.getenv('WASABI_ACCESS_KEY')
wasabi_secret_key = os.getenv('WASABI_SECRET_KEY')
bucket_name = os.getenv('WASABI_BUCKET_NAME')

# Create DuckDB connection
con = duckdb.connect()

# Configure S3 settings
con.execute(f"""
    SET s3_endpoint='{wasabi_endpoint}';
    SET s3_access_key_id='{wasabi_access_key}';
    SET s3_secret_access_key='{wasabi_secret_key}';
    SET s3_url_style='path';
    SET preserve_insertion_order = false;
    SET enable_progress_bar = true;
""")

print("✓ DuckDB configured with S3 credentials")
print(f"Endpoint: {wasabi_endpoint}")
print('Bucket Name: ', bucket_name, '\n\n')


✓ DuckDB configured with S3 credentials
Endpoint: s3.us-east-2.wasabisys.com
Bucket Name:  dfscrunch-data-lake 




In [274]:
sport = 'NFL'
slate_type = 'dk_single_game'
date = '*'
# date = '2025-09-07'
date = '2025-*'
user_lineups_path = f"s3://{bucket_name}/dds/{sport}/user_lineups/{slate_type}/{date}/data.parquet"
lineups_path = f"s3://{bucket_name}/dds/{sport}/lineups/{slate_type}/{date}/data.parquet"
players_path = f"s3://{bucket_name}/dds/{sport}/players/{slate_type}/{date}/data.parquet"
user_name = 'scout326'

df = con.execute(
    f"""
        with contests as (
          SELECT
              contest_id,
              multi_entry_max,
              CASE
                  WHEN contest_size <= 318 THEN '1_Tiny'
                  WHEN contest_size <= 4444 THEN '2_Small'
                  WHEN contest_size <= 16646 THEN '3_Medium'
                  WHEN contest_size <= 79270 THEN '4_Large'
                  ELSE '5_Massive'
              END as size_category
          FROM read_parquet('s3://{bucket_name}/dds/{sport}/contests/{slate_type}/{date}/data.parquet')
        ),
        user_lineups as (
          select
           *,
           list_element(string_split(filename, '/'), -2) as date
             from read_parquet('{user_lineups_path}', filename=true) ul
             join contests c on c.contest_id = ul.contest_id
                where size_category = any(['5_Massive']) and user_id = '{user_name}' and multi_entry_max >= 100
        ),
        full_lineups_data as (
        select distinct on (l.contest_id, l.lineup_hash) * from read_parquet('{lineups_path}') l
          join user_lineups ul on l.lineup_hash = ul.lineup_hash and l.contest_id = ul.contest_id
        ),
        flat_user_lineups as (
            select
                date,
                contest_id,
                lineup_hash,
                position_name,
                total_salary,
                total_own,
                player_id
            FROM full_lineups_data
            unpivot(player_id for position_name in(
            pos_cpt1,
            pos_flex1,
             pos_flex2,
             pos_flex3,
              pos_flex4,
               pos_flex5))
        ),
        players as (
            select
                distinct on (player_id, date, position)
                 *,
                list_element(string_split(filename, '/'), -2) as date,
             from read_parquet('{players_path}', filename=true)
        ),
        flat_lineups_players as (
            select * from flat_user_lineups ful
                join (select distinct on (player_id, date) * from players) p on p.player_id = ful.player_id and p.date = ful.date
        ),
        players_usage as (
        SELECT
            contest_id,
            player_id,
            position,
            count(player_id) as used_count
            from flat_lineups_players
            group by contest_id, player_id, position
        ),
        lineups_count_by_contest as (
            select
                contest_id,
                 count(lineup_hash) as lineups_count
                from user_lineups ul
                group by contest_id
            ),
        players_usage_percent as (
        select
            pu.contest_id,
            pu.player_id,
            pu.used_count,
            pu.used_count / lcb.lineups_count as used_count_percent,
            lcb.lineups_count
            from players_usage pu
                join lineups_count_by_contest lcb on lcb.contest_id = pu.contest_id
                ),
        max_players_usage_by_contest as (
        select
            contest_id,
            max(pu.used_count_percent) as max_used_count_percent, -- shows player usage on all positions
            from players_usage_percent pu
                group by contest_id
        ),
        cpt_usage as (
            select
                contest_id,
                player_id,
                position,
                count(player_id) as used_cpt_count
            from flat_lineups_players where position_name = 'pos_cpt1'
            group by contest_id, player_id, position
        ),
        players_usage_cpt_percent as (
            select
                cu.contest_id,
                cu.player_id,
                cu.position,
                cu.used_cpt_count,
                cu.used_cpt_count / lcb.lineups_count as used_cpt_count_percent,
                lcb.lineups_count
            from cpt_usage cu
                join lineups_count_by_contest lcb on lcb.contest_id = cu.contest_id
        ),
        max_cpt_usage_by_contest as ( -- max player usage on CPT positions
            select
              contest_id,
              max(used_cpt_count_percent) as max_cpt_usage_percent
            from players_usage_cpt_percent group by contest_id
        ),
        cpt_position_usage as (
            pivot (
            select
                contest_id,
                position,
                sum(used_cpt_count_percent) as position_usage_on_cpt_percent
             from players_usage_cpt_percent
                group by contest_id, position
                )
                on position
                using
                    sum(position_usage_on_cpt_percent) as position_usage_on_cpt_percent
                    group by contest_id
        )
        select
            fld.contest_id,
            min(fld.total_salary) as min_salary,
            max(fld.total_salary) as max_salary,
            min(fld.total_own) as min_own,
            max(fld.total_own) as max_own,
            mup.max_used_count_percent,
            mcup.max_cpt_usage_percent,
            cpup.QB_position_usage_on_cpt_percent,
            cpup.RB_position_usage_on_cpt_percent,
            cpup.WR_position_usage_on_cpt_percent,
            cpup.TE_position_usage_on_cpt_percent,
            cpup.K_position_usage_on_cpt_percent,
            cpup.D_position_usage_on_cpt_percent,
            from full_lineups_data fld
                join max_players_usage_by_contest mup on mup.contest_id = fld.contest_id
                join max_cpt_usage_by_contest mcup on mcup.contest_id = fld.contest_id
                join cpt_position_usage cpup on cpup.contest_id = fld.contest_id
                group by fld.contest_id,
                         mup.max_used_count_percent,
                          mcup.max_cpt_usage_percent,
                          cpup.QB_position_usage_on_cpt_percent,
                          cpup.RB_position_usage_on_cpt_percent,
                          cpup.WR_position_usage_on_cpt_percent,
                          cpup.TE_position_usage_on_cpt_percent,
                          cpup.K_position_usage_on_cpt_percent,
                          cpup.D_position_usage_on_cpt_percent,
    """
).df()

In [275]:
df.head(30)

Unnamed: 0,contest_id,min_salary,max_salary,min_own,max_own,max_used_count_percent,max_cpt_usage_percent,QB_position_usage_on_cpt_percent,RB_position_usage_on_cpt_percent,WR_position_usage_on_cpt_percent,TE_position_usage_on_cpt_percent,K_position_usage_on_cpt_percent,D_position_usage_on_cpt_percent
0,182932587,45600,49700,127.33,262.89,1.0,0.546667,0.633333,0.046667,0.28,0.02,0.013333,0.006667
1,182808315,43900,49100,118.52,271.31,0.68,0.386667,0.033333,0.533333,0.38,0.02,0.02,0.006667
2,183249742,45600,49300,135.11,231.61,0.993333,0.453333,0.486667,0.146667,0.366667,,,
3,181646666,44200,48900,122.17,235.56,0.666667,0.313333,0.606667,0.193333,0.166667,0.013333,0.006667,0.013333
4,181656895,44400,49600,114.9,272.16,0.886667,0.313333,0.6,0.18,0.14,0.06,0.02,
5,182613171,44500,49300,116.04,224.18,0.833333,0.28,0.2,0.453333,0.153333,0.166667,0.02,0.006667
6,183936992,44800,48300,119.18,247.51,0.673333,0.34,0.486667,0.106667,0.38,0.006667,,0.02
7,182493296,42000,48400,114.36,208.09,0.76,0.353333,0.3,0.206667,0.413333,0.08,,
8,183584865,46800,49200,98.22,196.42,0.786667,0.206667,0.193333,0.173333,0.52,0.106667,,0.006667
9,181856782,43700,49500,121.77,241.95,0.833333,0.493333,0.673333,0.246667,0.053333,0.026667,,
