# Exploring the Impact of the Lionesses' Success on the Women's Super League (WSL)
## data_visuals.ipynb â€” Exploratory Data Analysis
This notebook explores the WSL season-by-season data

#### Set-up, Import and Config

In [1]:
# Import
from pathlib import Path
import pandas as pd
import panel as pn
pn.extension('ipywidgets', sizing_mode='stretch_width')

import hvplot.pandas
import holoviews as hv
hv.extension('bokeh')

import sys

# Working directory
current = Path.cwd()
for parent in [current] + list(current.parents):
    if (parent / '.git').exists():
        repo_root = parent
        break
sys.path.append(str(repo_root))

# Function Imports
from config.general_config import DATA_OUT
from src.wsl_plots import create_kpi_card, create_club_capacity_attendance_chart, create_wsl_visual, create_nationality_figure, create_distribution_visual
from src.wsl_dashboard import build_dashboard
from src.data_utils import load_csv_data

In [2]:
# Config
wsl_data_file_name = 'combined_wsl_data.csv'
nat_data_file_name = 'nationality_combined_data.csv'
stadium_data_file_name = 'wsl_stadium_data.csv'

### Data Import

In [3]:
df_wsl = load_csv_data(DATA_OUT / wsl_data_file_name)
df_nationality = load_csv_data(DATA_OUT / nat_data_file_name)
df_stadiums = load_csv_data(DATA_OUT / stadium_data_file_name)

In [4]:
# df_wsl.head(2)
# df_nationality.head()
# df_stadiums.head(2)

##### Specific DataFrame for capacity and attendance plot

In [5]:
# Select columns
df_wsl_sel = df_wsl[['Club', 'Season', 'Attendance']]
df_stadiums_sel = df_stadiums[['Team', 'Season', 'Ground', 'Capacity']]

# Merge
df_combined = pd.merge(
    df_wsl_sel,
    df_stadiums_sel,
    left_on=['Club', 'Season'],
    right_on=['Team', 'Season'],
    how='left'
)

wsl_cap_att_df = df_combined.drop(columns='Team')
# wsl_cap_att_df.head(1)

## WSL Club and Season Visualisations

#### Plots to Save
*Commented out so as not to rewrite each time the code is run*

In [6]:
# capacity_fig = create_club_capacity_attendance_chart(
#     df= wsl_cap_att_df,
#     show_logo= True,
#     save_fig= True,
#     fig_name= 'capacity_fig'
# )

# points_fig = create_wsl_visual(
#     df=df_wsl,
#     metric_col='Points',
#     yaxis_label='Points',
#     title_prefix='Points',
#     show_logo= True,
#     save_fig= True,
#     fig_name= 'points_fig'
# )

# points_per_match_fig = create_wsl_visual(
#     df=df_wsl,
#     metric_col='Points_Per_Match',
#     yaxis_label='Points per Match',
#     title_prefix='Points per Match',
#     show_logo= True,
#     save_fig= True,
#     fig_name= 'points_per_match_fig'
# )

# goals_fig = create_wsl_visual(
#     df= df_wsl,
#     metric_col='Goals_For',  
#     yaxis_label='Goals',
#     title_prefix='Goals',
#     show_logo= True,
#     save_fig= True,
#     fig_name= 'goals_fig'
# )

# nationality_count = create_nationality_figure(
#     df=df_nationality, 
#     y_col='Num_Players',
#     hover_cols=['FIFA Ranking'],     
#     title='Number of Players per Season by Nationality Group',
#     yaxis_title='Number of Players',
#     height=900,
#     width=1200,
#     save_fig=True,
#     fig_name='nationality_count'  
# )

# distribution_fig = create_distribution_visual(df_nationality, save_fig=True, fig_name='distribution_fig')

#### Figures for Dashboard

In [7]:
# Plots
capacity_fig = create_club_capacity_attendance_chart(wsl_cap_att_df)

points_fig = create_wsl_visual(df=df_wsl,metric_col='Points',yaxis_label='Points',title_prefix='Points')

points_per_match_fig = create_wsl_visual(df=df_wsl,metric_col='Points_Per_Match',yaxis_label='Points per Match',title_prefix='Points per Match')

goals_fig = create_wsl_visual(df= df_wsl,metric_col='Goals_For',yaxis_label='Goals',title_prefix='Goals')

nationality_count = create_nationality_figure(
    df=df_nationality, 
    y_col='Num_Players',
    hover_cols=['FIFA Ranking'],     
    title='Number of Players per Season by Nationality Group',
    yaxis_title='Number of Players',
    height=900,
    width=1200, 
)

distribution_fig = create_distribution_visual(df_nationality)

Figure created.
Figure created.
Figure created.
Figure created.
Figure created.
Figure created.


In [8]:
# KPI Style Cards
attendance_kpi = create_kpi_card(
    df_wsl,
    metric_col='Attendance',
    season_col='Season',
    club_col='Club',
    title='Attendance',
    overview_aggregation='mean',
)

point_kpi = create_kpi_card(
    df_wsl,
    metric_col='Points',
    label_col='Points',
    season_col='Season',
    club_col='Club',
    title='Points',
    overview_aggregation='max',
)

points_per_match_kpi = create_kpi_card(
    df_wsl,
    metric_col='Points_Per_Match',
    label_col='Points_Per_Match',
    season_col='Season',
    club_col='Club',
    title='Points per Match',
    overview_aggregation='max',
)
top_scorer_kpi = create_kpi_card(
    df_wsl, metric_col='Top_Scorer_Goals',
    label_col='Top_Scorer_Name',
    season_col='Season',
    club_col='Club',
    title='Top Goal Scorer',
    overview_aggregation='max',
)

## Dashboard

- *The UEFA Women's Championship was won by:*
    - *Netherlands in 2017, with Denmark as runners-up*
    - *England in 2022, with Germany as runners-up*
    - *England in 2025, with Spain as runners-up*
- *The FIFA Women's World Cup was won by:*
    - *United States in 2019, with the Netherlands as runners-up*
    - *Spain in 2023, with England as runners-up*

In [9]:
attendance_summary = """
### Stadium Capacity vs Attendance
*Context for this period:*
- *The WSL became fully professional in 2017/18, which is why the dataset begins here.*
- *The 2019/20 to 2020/21 seasons were heavily disrupted by COVIDâ€‘19 and concluded early, so attendance data is not available.*
- **Note:* Some fixtures were played at larger venues than usual, which can temporarily inflate attendance figures. For example, Chelsea have hosted select matches at Stamford Bridge.*

This page explores how matchday attendance in the WSL has evolved relative to stadium capacity across the 2017/18â€“2024/25 seasons. 
The card on the left provides a seasonâ€‘byâ€‘season breakdown for each club, while the charts below show how attendance compares with stadium size over time.

#### Key Attendance Highlights (2017/18â€“2024/25)
- **Overall average attendance:** 3,909  
- **Highest season averages:**
  - **2017/18:** 3,818 - Manchester City  
  - **2018/19:** 2,040 - Chelsea  
  - **2019/20:** 6,204 - Tottenham  
  - **2020/21:** COVIDâ€‘19 impacted  
  - **2021/22:** 3,567 - Manchester United  
  - **2022/23:** 19,245 - Arsenal  
  - **2023/24:** 29,999 - Arsenal  
  - **2024/25:** 28,808 - Arsenal  

#### Chart Breakdown
- Bars represent each clubâ€™s stadium capacity.  
- Lines show actual attendance for each season.  
- The purple line indicates the leagueâ€‘wide average.  

This makes it easy to see:
- how well clubs fill their stadiums  
- how attendance varies between clubs  
- how leagueâ€‘wide interest has shifted over time  

Hover over any bar or line to view exact values.  
Use the dropdown menu above the chart to filter by club or view the league average only.
"""

In [10]:
points_summary = """
This page explores how clubs have performed across seasons in terms of total points and points per match. The card on the left provides a seasonâ€‘byâ€‘season breakdown, while the charts below show both raw points and normalised performance.

#### League winners
  - **2017/18:** Chelsea  
  - **2018/19:** Arsenal  
  - **2019/20:** Chelsea  (Due to the COVID-19 pandemic, the season ended early and the league was decided on a points-per-game basis) 
  - **2020/21:** Chelsea 
  - **2021/22:** Chelsea
  - **2022/23:** Chelsea
  - **2023/24:** Chelsea
  - **2024/25:** Chelsea

#### Key Point Highlights:
- **Overall top points:** 60
- **Highest season averages:**
  - **2017/18:** 44 - Chelsea  
  - **2018/19:** 54 - Arsenal  
  - **2019/20:** 40 - Manchester City  
  - **2020/21:** 57 - Chelsea
  - **2021/22:** 56 - Chelsea
  - **2022/23:** 58 - Chelsea
  - **2023/24:** 55 - Chelsea
  - **2024/25:** 60 - Chelsea

#### Key Point per Match Highlights:
- **Overall top points per match:** 2.73
- **Highest season averages:**
  - **2017/18:** 2.44 - Chelsea  
  - **2018/19:** 2.7 - Arsenal  
  - **2019/20:** 2.6 - Chelsea  
  - **2020/21:** 2.59 - Chelsea  
  - **2021/22:** 2.55 - Chelsea
  - **2022/23:** 2.64 - Chelsea
  - **2023/24:** 2.5 - Chelsea
  - **2024/25:** 2.73 - Chelsea

#### Chart Breakdown
The first chart shows the total points earned by each WSL club across seasons.  
The second chart displays points per match, providing a normalised view of performance regardless of the number of games played.

This approach:
- allows fairer comparisons across seasons  
- accounts for the leagueâ€™s expansion from 8 to 12 teams  
- provides a clearer view of the disrupted seasons  
"""


In [11]:
goals_summary = """
This page explores goalâ€‘scoring trends across the WSL, highlighting standout attackers, seasonal scoring patterns, and shifts in offensive performance. 
The card on the left provides a seasonâ€‘byâ€‘season breakdown, while the chart below shows total goals scored by each club.

#### Key Goal Highlights:
- **Highest singleâ€‘season goal tally:** 22
- **Top scorers by season:**
  - **2017/18:** 15 - Ellen White (English) for Birmingham City
  - **2018/19:** 22 - Vivianne Miedema (Dutch) for Arsenal
  - **2019/20:** 16 - Vivianne Miedema (Dutch) for Arsenal
  - **2020/21:** 21 - Sam Kerr (Australian) for Chelsea
  - **2021/22:** 20 - Sam Kerr (Australian) for Chelsea
  - **2022/23:** 22 - Rachel Daly (English) for Aston Villa
  - **2023/24:** 21 - Khadija Shaw (Jamaican) for Manchester City
  - **2024/25:** 12 - Alessia Russo (England) for Arsenal

#### Chart Breakdown
The chart displays the total goals scored by each WSL club across seasons.  
This helps highlight:

- attacking strengths and weaknesses  
- periods of dominance or decline  
- how scoring trends evolve over time  
- differences in offensive output between clubs  

Hover over any data point to view the exact number of goals scored in a given season, enabling detailed comparison across teams and years.
"""

In [12]:
nationality_summary = """
*Context for this period:*
- *The WSL became fully professional in 2017/18, which is why the dataset begins here.*
- *The league has expanded from 8 to 12 teams, increasing squad sizes and international recruitment.*
- *This period also coincides with major achievements for the England Womenâ€™s National Team:*
  - **UEFA Womenâ€™s EURO Champions**
    - 2022 - England (runnersâ€‘up: Germany)
    - 2025 - England (runnersâ€‘up: Spain)
  - **FIFA Womenâ€™s World Cup**
    - 2023 - Spain (runnersâ€‘up: England)

These successes significantly increased visibility, media coverage, and public interest in womenâ€™s football, contributing to rising WSL attendances and attracting more international talent to the league.

This page explores how player nationalities have evolved across the WSL from the 2017/18 to 2024/25 seasons. The card on the left shows the number of players per season by nationality group, while the chart below compares player representation with minutes played â€” highlighting not just who is present, but who is contributing most on the pitch.

#### Key Nationality Highlights
- **English players** consistently form the largest group, though their share has declined slightly over time.
- **European (non-English)** and **Non-European** players have increased in both presence and playing time.
- The rise in international minutes suggests growing reliance on overseas talent, especially in attacking and midfield roles.
- The leagueâ€™s globalisation reflects broader investment and visibility in womenâ€™s football.

#### Chart Breakdown
- The **bar chart** shows the number of players per season by nationality group:  
  - **English** (red), **European excluding English** (blue), and **Non-European** (green).
- The **line chart** compares the percentage of players vs. the percentage of minutes played by each group.

This dual view helps identify:
- shifts in recruitment and playing time  
- whether certain groups are under- or over-represented in match minutes  
- how the leagueâ€™s competitive balance and diversity have evolved over time  

Hover over any bar or line to explore exact values by season and nationality group.
"""

In [13]:
pages_dict = {
    'Attendance': {
        'title': 'WSL Attendance Trends for Season 2017/18 to 2024/25',
        'A': attendance_kpi,
        'B': attendance_summary,
        'C': capacity_fig,
    },
    'Points': {
        'title': 'WSL Points Progression and Points per Match Trends for Season 2017/18 to 2024/25',
        'A': points_summary,
        'B': point_kpi,
        'C': points_per_match_kpi,
        'D': points_fig,
        'E': points_per_match_fig,
    },
    'Goals': {
        'title': 'WSL Goals and Top Goal Scorers for Season 2017/18 to 2024/25',
        'A': top_scorer_kpi,
        'B': goals_summary,
        'C': goals_fig,
    },
    'Nationality': {
        'title': 'Nationality Representation in the WSL for Season 2017/18 to 2024/25',
        'A': nationality_summary,
        'B': nationality_count,
        'C': distribution_fig,
    },
}

In [14]:
wsl_dashboard = build_dashboard(
    pages_dict,
    title='WSL Analytics Dashboard',
)

wsl_dashboard.show()

# # Save Dashboard
# from panel.io.save import save
# save(wsl_dashboard, "wsl_dashboard.html")

Launching server at http://localhost:57455


<panel.io.server.Server at 0x1e12dddaf90>

