# II. VIZUALIZACE - PROD

## 1) - STATISTIKY závodníci a kluby obecně

- **multi-legend grafy** (udělám dva sloupce legend, M-medaile 1/2/3 a W-medaile 1/2/3, s maličko jinými barvami - celkem tedy 6 barev)
- případně přes **TABS** https://dash.plotly.com/dash-core-components/tab = 3 tabs na pohlaví
- **dynamic options** dropdownu - podle roku nabídni multi-dropdown eventy, nebo podle klubu jeho lidi
- **cross filtering** (vyberu člověka -> uvidím jeho statistiky, nebo klub a uvidím jeho lidi - nebo oboje ! https://dash.plotly.com/interactive-graphing


In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from dotenv import load_dotenv
import os

load_dotenv()

project_path = os.getenv("PROJECT_PATH")


def printbold(s):
    print("\033[1m" + s + "\033[0m")
    

## Data

In [3]:
df_tot = pd.read_excel(f"{project_path}/data/1_fencers.xlsx")

printbold(str(len(df_tot)))

df_tot_ranky = pd.read_excel(f"{project_path}/data/2_tournament_results.xlsx")
printbold(str(len(df_tot_ranky)))


[1m397[0m
[1m1464[0m


In [4]:
titles = {"W": "ŽENY", "M": "MUŽI"}
colors = {"W": "red", "M": "navy"}
medal_colors = {"1-W": "rgba(255, 215, 0, 1)",  # Gold (Women)
                "1-M": "rgba(255, 215, 0, 0.4)",  # Gold (Men)
                "2-W": "rgba(192, 192, 192, 1)",  # Silver (Women)
                "2-M": "rgba(192, 192, 192, 0.4)",  # Silver (Men)
                "3-W": "rgba(218, 165, 32, 1)",   # Bronze (Women)
                "3-M": "rgba(218, 165, 32, 0.4)",  # Bronze (Men)                    
                }

## 1.1 - Počty závodníků podle Klubu a Roku narození, chci dropdown na pohlaví

In [5]:
np.random.seed(0)

data = {
    'sex': np.random.choice(['M', 'W'], 2000),
    'birthyear': np.random.randint(1970, 2000, 2000),
    'club': np.random.choice(['Club A', 'Club B', 'Club C'], 2000)
}

df_tot = pd.DataFrame(data)

fig = make_subplots(rows=1, cols=1)

def update_plot(sex_filter):
    if sex_filter == 'ALL':
        print("ALL IS SELECTED")
        df_filtered = df_tot
    else:
        print(f"{sex_filter=} IS SELECTED")
        df_filtered = df_tot[df_tot['sex'] == sex_filter]

    df = pd.DataFrame({'count': df_filtered.groupby(['birthyear', 'club']).size()}).reset_index()

    clubs = df['club'].unique()
    years = sorted(df['birthyear'].unique())

    for club in clubs:
        club_data = df[df['club'] == club]
        counts = [club_data[club_data['birthyear'] == year]['count'].iloc[0] if year in club_data['birthyear'].values else 0 for year in years]
        try:
            fig.update_traces(x=years, y=counts, selector=dict(name=club))
        except:
            fig.add_trace(go.Bar(
                x=years,
                y=counts,
                name=club,
            ))

    fig.update_layout(title=f'All Athletes - Members by Birth Year and Club', barmode='stack')
    fig.update_xaxes(tickangle=270, title_text='Birth Year')
    fig.update_yaxes(title_text='Count')

# Add dropdown menu
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(
                    args=[{'visible': True}],
                    label='Men',
                    method='update',
                ),
                dict(
                    args=[{'visible': True}],
                    label='Women',
                    method='update',
                ),
                dict(
                    args=[{'visible': True}],
                    label='All',
                    method='update',
                )
            ],
            direction='down',
            showactive=True,
            x=0.1,
            xanchor='left',
            y=1.15,
            yanchor='top',
        ),
    ]
)

update_plot('Men')
fig.show()


sex_filter='Men' IS SELECTED


In [6]:
def update_plot(sex_filter):
    if sex_filter == 'ALL':
        df_filtered = df_tot
    else:
        df_filtered = df_tot[df_tot['sex'] == sex_filter]
        
    df = pd.DataFrame({'count': df_filtered.groupby(
        ['birthyear', "club"]).size()}).reset_index()

    fig = px.bar(df,
                 x= df["birthyear"],
                y= df["count"], 
                color = df["club"],
                width=950, height=700,
                labels = {"club": "Klub"},  
                category_orders={"club": sorted(df["club"].unique())}, 
                title = "Závodníci podle roku narození a klubu"
                )

    fig.update_layout(xaxis = dict(tickvals = df["birthyear"]))
    fig.update_xaxes(tickangle=270)
    fig.update_xaxes(title_text="Rok narození")
    fig.update_yaxes(title_text="Počet")
    fig.show()

fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(
                    args=[{'visible': True}],
                    label='Men',
                    method='update',
                ),
                dict(
                    args=[{'visible': True}],
                    label='Women',
                    method='update',
                ),
                dict(
                    args=[{'visible': True}],
                    label='All',
                    method='update',
                )
            ],
            direction='down',
            showactive=True,
            x=0.1,
            xanchor='left',
            y=1.15,
            yanchor='top',
        ),
    ]
)

update_plot('All')


ValueError: Cannot accept list of column references or list of columns for both `x` and `y`.

In [8]:
for sex in ["W", "M"]:
    title_sex = titles[sex]

    df = pd.DataFrame({'count': df_tot[df_tot["sex"]==sex].groupby(
        ['birthyear', "club"]).size()}).reset_index()

    fig = px.bar(df, x= df["birthyear"],
                    y= df["count"], 
                    color = df["club"] ,
                    width=950, height=700,
                    labels = {"club": "Klub"},  
                    category_orders={"club": sorted(df["club"].unique())}, 
                    title = f"{title_sex} - Závodníci podle roku narození a klubu")

    fig.update_layout(xaxis = dict(tickvals = df["birthyear"]))
    fig.update_xaxes(tickangle=270)
    fig.update_xaxes(title_text="Rok narození")
    fig.update_yaxes(title_text="Počet")
    fig.show()
    
    #pio.write_html(fig, file = f"{project_path}/visualization/new version/V1_Year-Club_{sex}.html", auto_open=False)

        

## 1.2 - Počty závodníků podle roku narození, pohlaví mám navy + red

In [9]:
years = sorted(df_tot["birthyear"].unique())

fig = go.Figure()
for sex in ["W", "M"]:
    title_sex = titles[sex]
    color = colors[sex]
    
    df =  pd.DataFrame({'count' : df_tot[df_tot["sex"]==sex].groupby(
        ['birthyear', "club"] ).size()}).reset_index()
    fig.add_trace(go.Bar(x=df["birthyear"], y=df["count"], name=title_sex, marker=dict(color=color)))
    
fig.update_layout(barmode='stack')  
fig.update_layout(width=950, height=700,)
fig.update_layout(xaxis = dict(tickvals = years))        

fig.update_xaxes(tickangle=270)
fig.update_xaxes(title_text="Rok narození")
fig.update_yaxes(title_text="Počet")
fig.show()

#pio.write_html(fig, file = f"{project_path}/visualization/new version/V1_Year-Club_All.html", auto_open=False)   


## 1.3 - Počty závodníků podle klubů 
### chci dropdown na pohlaví, protože nemohu rozpadnout každou barvu na dvě. // nebo nemusím do legendy dávat kluby barevně, a pak ok, navy + red

In [10]:
for sex in ["W", "M"]:
    title_sex = titles[sex]

    df = pd.DataFrame({'count': df_tot[df_tot["sex"]==sex].groupby(
        ["club"]).size()}).reset_index()
    
    fig = px.bar(df, x= df["club"],
                    y= df["count"], 
                    color = df["club"] ,
                    text_auto='.0f',       
                    width=950, height=700,
                    labels = {"club": "Klub"}, 
                    title = f"{title_sex} - Kluby podle počtu závodníků")

    fig.update_layout(xaxis = dict(tickvals = df["club"]))
    fig.update_layout(xaxis={'categoryorder':'total descending'})   
    # trik z grafu od medailí, nemusím pak sortovat na začátku to groupby, funguje i pro víc traces, i na jejich součet

    fig.update_xaxes(tickangle=270)
    fig.update_xaxes(title_text="Klub")
    fig.update_yaxes(title_text="Počet")
    fig.show()

    #pio.write_html(fig, file = f"{project_path}/visualization/new version/V2_Club_{sex}.html", auto_open=False)
    

## 2.1 - Medaile podle roku narození

In [None]:
# lets drop DNFs
#df_tot_ranky = df_tot_ranky[df_w_ranky.Final_rank != "999"]

In [11]:
df_ranky = df_tot_ranky

medals = df_ranky.loc[df_ranky["final_rank"].isin([1, 2, 3]), :]
d2 = pd.DataFrame({'count' : medals.groupby(['birthyear', "final_rank", "sex"]).size()}).reset_index()
d2["final_rank"] = d2["final_rank"].astype(str)

fig = px.bar(d2, x=d2["birthyear"],
                y=d2["count"], 
                color=d2["final_rank"] + "-" + d2["sex"],
                color_discrete_map=medal_colors,
                text_auto='.0f',      
                width=950, height=700,
                labels={"final_rank": "Medailové umístění"},  
                category_orders={"color": ["3-M", "3-W", "2-M", "2-W","1-M","1-W"]},  
                title=f"{title_sex} - Medaile podle roku narození")

fig.update_layout(legend_traceorder="reversed")  
fig.update_layout(xaxis=dict(tickvals=d2["birthyear"]))
fig.update_xaxes(tickangle=270)
fig.update_xaxes(title_text="Rok narození")
fig.update_yaxes(title_text="Počet medailí")

fig.update_layout(legend=dict(
    title="Medals",
    orientation="v",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    bgcolor="rgba(255, 255, 255, 0.6)"
))

fig.show()


#pio.write_html(fig, file = f"{project_path}/visualization/new version/V3_Medals-Year.html", auto_open=False)
    

KeyError: 'birthyear'

## 2.2 - Medaile souhrn podle klubu

In [None]:
for sex in ["W", "M"]:
    title_sex = titles[sex]
    
    df_ranky = df_tot_ranky[df_tot_ranky.sex == sex]
    
    medals = df_ranky.loc[df_ranky["final_rank"].isin([1,2,3]), :]
    d2 = pd.DataFrame({'count' : medals.groupby( ['club', "final_rank"] ).size()}).reset_index()
    d2["final_rank"] = d2["final_rank"].astype(str)

    fig = px.bar(d2,
                    x= d2["club"],
                    y= d2["count"], 
                    color = d2["final_rank"] ,
                    color_discrete_map={"1": "gold", "2":"silver", "3":"goldenrod"}, 
                    text_auto='.0f',     
                    width=950, height=700,
                    labels = {"final_rank": "Medailové umístění"},  
                    category_orders={"final_rank": ["3","2","1"]},     
                    title = f"{title_sex} - Medaile podle klubů")

    fig.update_layout(xaxis = dict(tickvals = d2["club"]))
    fig.update_layout(xaxis={'categoryorder':'total descending'})
    # trik, nemusím pak sortovat na začátku to groupby, funguje i pro víc traces, i na jejich součet

    fig.update_layout(legend_traceorder="reversed") 
    fig.update_xaxes(tickangle=270)
    fig.update_xaxes(title_text="Klub")
    fig.update_yaxes(title_text="Počet medailí")
    fig.show()

    #pio.write_html(fig, file = f"{project_path}/visualization/new version/V4_Medals-Club_{sex}.html", auto_open=False)


In [None]:
clubs = sorted(df_ranky["club"].unique())

fig = go.Figure()
for sex in ["W", "M"]:
    title_sex = titles[sex]
    color = colors[sex]
    
    df =  pd.DataFrame({'count' : df_tot[df_tot["sex"]==sex].groupby(
        ["club"] ).size()}).reset_index()
    fig.add_trace(go.Bar(x=df["club"], y=df["count"], name=title_sex, marker=dict(color=color)))

fig.update_layout(barmode='stack')  
fig.update_layout( width=950, height=700,)
fig.update_layout(xaxis = dict(tickvals = clubs))        
fig.update_layout(xaxis={'categoryorder':'total descending'})
# trik z grafu od medailí, nemusím pak sortovat na začátku to groupby, funguje i pro víc traces, i na jejich součet

fig.update_xaxes(tickangle=270)
fig.update_xaxes(title_text="Klub")
fig.update_yaxes(title_text="Počet")
fig.show()

#pio.write_html(fig, file = f"{project_path}/visualization/new version/V2_Club_All.html", auto_open=False)   

## 5.4. untested options

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

# Generate dummy data
np.random.seed(0)

# Generate dummy data for men
data_men = {
    'sex': np.repeat('M', 100),
    'birthyear': np.random.randint(1970, 2000, 100),
    'final_rank': np.random.choice([1, 2, 3, 4, 5], 100, p=[0.5, 0.1, 0.1, 0.15, 0.15])
}

# Generate dummy data for women
data_women = {
    'sex': np.repeat('W', 100),
    'birthyear': np.random.randint(1970, 2000, 100),
    'final_rank': np.random.choice([1, 2, 3, 4, 5], 100, p=[0.1, 0.2, 0.5, 0.15, 0.05])
}

# Combine data
df_men = pd.DataFrame(data_men)
df_women = pd.DataFrame(data_women)
df_tot_ranky = pd.concat([df_men, df_women])

display(df_tot_ranky.tail())


In [None]:
df_ranky = df_tot_ranky

medals = df_ranky.loc[df_ranky["final_rank"].isin([1, 2, 3]), :]
d2 = pd.DataFrame({'count' : medals.groupby(['birthyear', "final_rank", "sex"]).size()}).reset_index()
d2["final_rank"] = d2["final_rank"].astype(str)

fig = px.bar(d2, x=d2["birthyear"],
                y=d2["count"], 
                color=d2["final_rank"] + "-" + d2["sex"],
                color_discrete_map={
                    "1-W": "rgba(255, 215, 0, 1)",  # Gold (Women)
                    "1-M": "rgba(255, 215, 0, 0.4)",  # Gold (Men)
                    "2-W": "rgba(192, 192, 192, 1)",  # Silver (Women)
                    "2-M": "rgba(192, 192, 192, 0.4)",  # Silver (Men)
                    "3-W": "rgba(218, 165, 32, 1)",   # Bronze (Women)
                    "3-M": "rgba(218, 165, 32, 0.4)",  # Bronze (Men)                    
                },
                text_auto='.0f',      
                width=950, height=700,
                labels={"final_rank": "Medailové umístění"},  
                category_orders={"color": ["3-M", "3-W", "2-M", "2-W","1-M","1-W"]},  
                title=f"{title_sex} - Medaile podle roku narození")

fig.update_layout(legend_traceorder="reversed")  
fig.update_layout(xaxis=dict(tickvals=d2["birthyear"]))
fig.update_xaxes(tickangle=270)
fig.update_xaxes(title_text="Rok narození")
fig.update_yaxes(title_text="Počet medailí")

fig.update_layout(legend=dict(
    title="Medals",
    orientation="v",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01,
    bgcolor="rgba(255, 255, 255, 0.6)"
))

fig.show()
