In [2]:
import polars as pl
import plotly.express as px
#  Plot Rank by Year, and Rank by Career Year.
#  Figure out a good way to filter. Ideas are:
#       filter by league
#       Players with 5 or more years
#       Players who acheived top xx for yy or more years

#----- LOAD AND CLEAN THE DATASET
df_league_names = pl.DataFrame({
    'LEAGUE_ABBR': ['ATP', 'LPGA', 'MLB', 'NBA', 'NHL', 'PGA', 'WNBA', 'WTA'],
    'LEAGUE_NAME': [
        'Association of Tennis Professionals', 
        'Ladies Professional Golf Association', 
        'Major League Baseball', 
        'National Basketball Association', 
        'National Hockey League', 
        'Professional Golf Association', 
        'Womens National Basketball Association',  
        'Womens Tennis Association'
        ]
})
df  = (
    pl.read_csv('one-hit-wonders.csv')
    .rename(  # upper case all column names, replace spaces with underscores
        lambda c: 
            c.upper()            # column names to upper case
            .replace(' ', '_')   # blanks replaced with underscores
    )
    .filter(pl.col('DNP') == False)
    .select(
        NAME = pl.col('NAME').str.to_titlecase(),
        NAME_COUNT = pl.len().over('NAME').cast(pl.UInt8),
        YEAR = pl.col('YEAR').cast(pl.UInt16),
        CAREER_YEAR = (1 + pl.col('YEAR_INDEX')),
        PEAK_CAREER_YEAR = pl.col('PEAK_YEAR_INDEX'),
        SPORT = pl.col('SPORT_NAME').str.to_titlecase(),
        TEAM = pl.col('TEAM'),
        PLAYED_VAL = pl.col('PLAYED_VAL').cast(pl.UInt16),
        RANK = pl.col('RANK').cast(pl.UInt16),
        HIGHEST_RANK = pl.col('RANK').min().over('NAME').cast(pl.UInt8),
        FIRST_YEAR = pl.col('YEAR').min().over('NAME').cast(pl.UInt16),
        LEAGUE_ABBR = pl.col('LEAGUE').str.to_uppercase(),
    )
    .join(df_league_names, on = 'LEAGUE_ABBR', how='left')
    .filter(pl.col('HIGHEST_RANK') <= 10)
     # .sort('')
)
print(df.shape)
print(df.glimpse())
# # Convert Polars DataFrame to a dictionary for Plotly
# heatmap_data = df.to_dict(as_series=False)

# # Create the scatter
fig = px.line(
    df,
    x='YEAR',
    y='RANK',
    color='NAME',
    template='simple_white',
    markers=True,
    line_shape='spline'
    # x=heatmap_data['CUSTOMER'],
    # y=heatmap_data['LOCATION'],
    # z=heatmap_data['TOTAL'],
    # labels={"x": 'CUSTOMER', "y": 'LOCATION', "z": 'TOTAL SPEND'},
    # color_continuous_scale='greens',
    # text_auto='$,d'# text_auto=".2f",
    # # title='SALES BY CUSTOMER, LOCATION'
)

fig.update_yaxes(autorange='reversed')
# fig.update_layout(
#     title=dict(text='Sales by Cusomer, Location')
# )
# fig.update(layout_coloraxis_showscale=False)
fig.show()

(8737, 13)
Rows: 8737
Columns: 13
$ NAME             <str> 'Derrick Rose', 'Derrick Rose', 'Derrick Rose', 'Derrick Rose', 'Derrick Rose', 'Derrick Rose', 'Derrick Rose', 'Derrick Rose', 'Dana Barros', 'Dana Barros'
$ NAME_COUNT        <u8> 8, 8, 8, 8, 8, 8, 8, 8, 14, 14
$ YEAR             <u16> 2009, 2010, 2011, 2012, 2014, 2015, 2016, 2017, 1990, 1991
$ CAREER_YEAR      <i64> 1, 2, 3, 4, 6, 7, 8, 9, 1, 2
$ PEAK_CAREER_YEAR <i64> 2, 2, 2, 2, 2, 2, 2, 2, 5, 5
$ SPORT            <str> 'Basketball', 'Basketball', 'Basketball', 'Basketball', 'Basketball', 'Basketball', 'Basketball', 'Basketball', 'Basketball', 'Basketball'
$ TEAM             <str> 'CHI', 'CHI', 'CHI', 'CHI', 'CHI', 'CHI', 'CHI', 'NYK', 'SEA', 'SEA'
$ PLAYED_VAL       <u16> 81, 78, 81, 39, 10, 51, 66, 64, 81, 66
$ RANK             <u16> 113, 73, 3, 25, 392, 198, 482, 248, 191, 153
$ HIGHEST_RANK      <u8> 3, 3, 3, 3, 3, 3, 3, 3, 8, 8
$ FIRST_YEAR       <u16> 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 1990, 1990
$ LEAG

In [3]:
 # Create the scatter
fig = px.line(
    df,
    x='CAREER_YEAR',
    y='RANK',
    color='NAME',
    template='simple_white',
    markers=True,
    line_shape='spline'
    # x=heatmap_data['CUSTOMER'],
    # y=heatmap_data['LOCATION'],
    # z=heatmap_data['TOTAL'],
    # labels={"x": 'CUSTOMER', "y": 'LOCATION', "z": 'TOTAL SPEND'},
    # color_continuous_scale='greens',
    # text_auto='$,d'# text_auto=".2f",
    # # title='SALES BY CUSTOMER, LOCATION'
)

fig.update_yaxes(autorange='reversed')
# fig.update_layout(
#     title=dict(text='Sales by Cusomer, Location')
# )
# fig.update(layout_coloraxis_showscale=False)
fig.show()

In [4]:
import polars as pl
import plotly.express as px
#  Dataset has 10 unique customers & locations, 92 unique customer/locatio pairs

#----- LOAD AND CLEAN THE DATASET
df  = (
    pl.read_csv('one-hit-wonders.csv')
    .rename(  # upper case all column names, replace spaces with underscores
        lambda c: 
            c.upper()            # column names to upper case
            .replace(' ', '_')   # blanks replaced with underscores
    )
    # .filter(pl.col('DNP'))
    # .select(
    #     YEAR = pl.col('YEAR').cast(pl.UInt16),
    #     YEAR_INDEX = pl.col('CAREER_YEAR'),
    #     TOTAL_PLAYERS = pl.col('PLAYERS'),
    #     PEAK_YEAR_INDEX = pl.col('PEAK_CAREER_YEAR'),
    #     SPORT = pl.col('SPORT_NAME').str.to_titlecase(),
    #     RANK = pl.col('RANK').cast(pl.UInt8),
    #     LEAGUE = pl.col('LEAGUE').str.to_uppercase(),
    # )
)
df

ID,NAME,YEAR,YEAR_INDEX,TOTAL_PLAYERS,…,PLAYED_VAL,PLAYED_2_PROP,PLAYED_2_VAL,TEAM,POS
str,str,i64,i64,i64,…,i64,str,str,str,str
"""rosede01""","""Derrick Rose""",2009,0,445,…,81,,,"""CHI""",
"""rosede01""","""Derrick Rose""",2010,1,442,…,78,,,"""CHI""",
"""rosede01""","""Derrick Rose""",2011,2,452,…,81,,,"""CHI""",
"""rosede01""","""Derrick Rose""",2012,3,478,…,39,,,"""CHI""",
"""rosede01""","""Derrick Rose""",2013,4,469,…,,,,,
…,…,…,…,…,…,…,…,…,…,…
"""12916""","""Francisco Lindor""",2016,1,1353,…,684,"""IP""",,"""Indians""","""SS"""
"""12916""","""Francisco Lindor""",2017,2,1358,…,723,"""IP""",,"""Indians""","""SS"""
"""15429""","""Kris Bryant""",2015,0,1348,…,650,"""IP""",,"""Cubs""","""3B"""
"""15429""","""Kris Bryant""",2016,1,1353,…,699,"""IP""",,"""Cubs""","""3B/LF"""


In [5]:
import plotly.express as px
import pandas as pd

df = pd.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2025/week-38/one-hit-wonders.csv")

fig = px.bar(df, x='name', y='played_val', hover_data='year', labels={'played_val': 'games played'})
fig.show()

In [7]:
import polars as pl
pl.Config().set_tbl_cols(10)
import plotly.express as px
import dash
from dash import Dash, dcc, html, Input, Output
import dash_mantine_components as dmc
import dash_ag_grid as dag
dash._dash_renderer._set_react_version('18.2.0')

#  Plot Rank by Year, and Rank by Career Year.
#  Figure out a good way to filter. Ideas are:
#       filter by league
#       Players with 5 or more years
#       Players who acheived top xx for yy or more years

#----- LOAD AND CLEAN THE DATASET
df_league_names = pl.DataFrame({
    'LEAGUE_ABBR': ['ATP', 'LPGA', 'MLB', 'NBA', 'NHL', 'PGA', 'WNBA', 'WTA'],
    'LEAGUE_NAME': [
        'Association of Tennis Professionals', 
        'Ladies Professional Golf Association', 
        'Major League Baseball', 
        'National Basketball Association', 
        'National Hockey League', 
        'Professional Golf Association', 
        'Womens National Basketball Association',  
        'Womens Tennis Association'
        ]
})
dict_league_abbr_name = dict(zip(
    df_league_names['LEAGUE_ABBR'], df_league_names['LEAGUE_NAME']))

df  = (
    pl.read_csv('one-hit-wonders.csv')
    .rename(  # upper case all column names, replace spaces with underscores
        lambda c: 
            c.upper()            # column names to upper case
            .replace(' ', '_')   # blanks replaced with underscores
    )
    .filter(~ pl.col('DNP'))
    .select(
        NAME = pl.col('NAME').str.to_titlecase(),
        NAME_COUNT = pl.len().over('NAME').cast(pl.UInt8),
        YEAR = pl.col('YEAR').cast(pl.UInt16),
        YEARS_TOT = pl.col('YEAR').cast(pl.UInt16).len().over('NAME'),
        CAREER_YEAR = (1 + pl.col('YEAR_INDEX')),
        PEAK_CAREER_YEAR = pl.col('PEAK_YEAR_INDEX'),
        SPORT = pl.col('SPORT_NAME').str.to_titlecase(),
        TEAM = pl.col('TEAM'),
        PLAYED_VAL = pl.col('PLAYED_VAL').cast(pl.UInt16),
        RANK = pl.col('RANK').cast(pl.UInt16),
        RANK_MED = pl.col('RANK').cast(pl.UInt16).median().over('NAME', 'LEAGUE'),
        RANK_MEAN = pl.col('RANK').cast(pl.UInt16).mean().over('NAME'),
        FIRST_YEAR = pl.col('YEAR').min().over('NAME').cast(pl.UInt16),
        LEAGUE_ABBR = pl.col('LEAGUE').str.to_uppercase(),
        NAME_LEAGUE = (
            pl.col('NAME').str.to_titlecase() + pl.lit(' (') +
            pl.col('LEAGUE').str.to_uppercase() + pl.lit(')')
        )
    )
    .filter(pl.col('YEARS_TOT') >= 5)
    .sort('LEAGUE_ABBR', 'RANK_MED', 'RANK_MEAN')
    .join(df_league_names, on = 'LEAGUE_ABBR', how='left')

    # .with_columns(LEAGUE_RANK=pl.col('LEAGUE_ABBR').cum_count())
)
df_league_rank = (
    df
    .unique('NAME_LEAGUE')    
    .sort('LEAGUE_ABBR', 'RANK_MED', 'RANK_MEAN')
    .with_columns(RANK_LEAGUE = pl.col('LEAGUE_ABBR').cum_count().over('LEAGUE_ABBR'))
    # .filter(pl.col('RANK_MED') == 1)
)
print('df_league_rank')
print(df_league_rank.shape)
print(df_league_rank.glimpse())
print(df_league_rank.select('NAME_LEAGUE', 'RANK_MED', 'RANK_MEAN', 'RANK_LEAGUE').head(10))


df_league_rank
(1075, 17)
Rows: 1075
Columns: 17
$ NAME             <str> 'Roger Federer', 'Rafael Nadal', 'Pete Sampras', 'Novak Djokovic', 'Andy Murray', 'Andy Roddick', 'Yevgeny Kafelnikov', 'Jo Wilfried Tsonga', 'David Ferrer', 'Tomas Berdych'
$ NAME_COUNT        <u8> 21, 17, 15, 15, 15, 13, 12, 17, 18, 17
$ YEAR             <u16> 1999, 2008, 1989, 2016, 2007, 2005, 2002, 2016, 2007, 2016
$ YEARS_TOT        <u32> 21, 17, 15, 15, 15, 13, 12, 17, 18, 17
$ CAREER_YEAR      <i64> 3, 8, 2, 14, 5, 6, 11, 16, 8, 16
$ PEAK_CAREER_YEAR <i64> 20, 16, 14, 14, 14, 11, 9, 16, 15, 16
$ SPORT            <str> 'Tennis', 'Tennis', 'Tennis', 'Tennis', 'Tennis', 'Tennis', 'Tennis', 'Tennis', 'Tennis', 'Tennis'
$ TEAM             <str> None, None, None, None, None, None, None, None, None, None
$ PLAYED_VAL       <u16> 22, 19, 0, 17, 19, 20, 25, 17, 23, 21
$ RANK             <u16> 64, 1, 81, 2, 11, 3, 27, 12, 5, 10
$ RANK_MED         <f64> 2.0, 2.0, 3.0, 3.0, 4.0, 8.0, 8.5, 13.0, 14.0, 14.0
$ RANK_MEAN