In [1]:
#team analytics
import pandas as pd
import plotly.express as px

In [2]:
matches_df = pd.read_csv('data/matches.csv')  # Matches data
teams_df = pd.read_csv('data/teams.csv')  # Team-related data
players_df = pd.read_csv('data/players.csv')  # Player-related data

In [3]:
matches_df[['team_1', 'team_2']] = matches_df['match_name'].str.split(' vs ', expand=True)
matches_df['year'] = matches_df['tournament_id'].str.extract('(\d{4})')
matches_df.columns

Index(['key_id', 'tournament_id', 'tournament_name', 'match_id', 'match_name',
       'stage_name', 'group_name', 'group_stage', 'knockout_stage', 'replayed',
       'replay', 'match_date', 'match_time', 'stadium_id', 'stadium_name',
       'city_name', 'country_name', 'home_team_id', 'home_team_name',
       'home_team_code', 'away_team_id', 'away_team_name', 'away_team_code',
       'score', 'home_team_score', 'away_team_score', 'home_team_score_margin',
       'away_team_score_margin', 'extra_time', 'penalty_shootout',
       'score_penalties', 'home_team_score_penalties',
       'away_team_score_penalties', 'result', 'home_team_win', 'away_team_win',
       'draw', 'team_1', 'team_2', 'year'],
      dtype='object')

In [39]:
matches_df['tournament_name'].unique()

array(["1930 FIFA Men's World Cup", "1934 FIFA Men's World Cup",
       "1938 FIFA Men's World Cup", "1950 FIFA Men's World Cup",
       "1954 FIFA Men's World Cup", "1958 FIFA Men's World Cup",
       "1962 FIFA Men's World Cup", "1966 FIFA Men's World Cup",
       "1970 FIFA Men's World Cup", "1974 FIFA Men's World Cup",
       "1978 FIFA Men's World Cup", "1982 FIFA Men's World Cup",
       "1986 FIFA Men's World Cup", "1990 FIFA Men's World Cup",
       "1991 FIFA Women's World Cup", "1994 FIFA Men's World Cup",
       "1995 FIFA Women's World Cup", "1998 FIFA Men's World Cup",
       "1999 FIFA Women's World Cup", "2002 FIFA Men's World Cup",
       "2003 FIFA Women's World Cup", "2006 FIFA Men's World Cup",
       "2007 FIFA Women's World Cup", "2010 FIFA Men's World Cup",
       "2011 FIFA Women's World Cup", "2014 FIFA Men's World Cup",
       "2015 FIFA Women's World Cup", "2018 FIFA Men's World Cup",
       "2019 FIFA Women's World Cup", "2022 FIFA Men's World Cup"],
      dt

## Visuals 

In [45]:
def men_team_performance_pie(team_name, matches_df):
    # Filter rows where the year is even
    matches_df['year'] = pd.to_numeric(matches_df['year'], errors='coerce')  # Convert to numeric, coercing errors to NaN
    matches_df = matches_df[matches_df['year'] % 2 == 0].dropna(subset=['year'])
    
    # Filter rows where the team is either home or away
    home_matches = matches_df[matches_df['home_team_name'] == team_name]
    away_matches = matches_df[matches_df['away_team_name'] == team_name]
    
    # Count wins, losses, and draws for home and away matches
    home_wins = home_matches['home_team_win'].sum()
    away_wins = away_matches['away_team_win'].sum()
    home_draws = home_matches['draw'].sum()
    away_draws = away_matches['draw'].sum()
    
    # Calculate losses (total games - wins - draws)
    home_losses = len(home_matches) - home_wins - home_draws
    away_losses = len(away_matches) - away_wins - away_draws
    
    # Total wins, losses, and draws
    total_wins = home_wins + away_wins
    total_losses = home_losses + away_losses
    total_draws = home_draws + away_draws
    
    # Create a DataFrame for the pie chart
    data = {
        'Result': ['Win', 'Loss', 'Draw'],
        'Count': [total_wins, total_losses, total_draws]
    }
    
    # Create the pie chart
    fig = px.pie(data, names='Result', values='Count', title=f"Performance of {team_name} (Men's Teams)")
    fig.show()


In [46]:
men_team_performance_pie('Spain', matches_df)

In [52]:
import plotly.graph_objects as go

def team_performance_over_years(team_name, matches_df, filter_even_years=True):
    # Initialize variables to hold win, loss, and draw counts
    wins, losses, draws = 0, 0, 0
    
    # Create empty lists to hold the data for plotting
    years = []
    win_percentages = []
    loss_percentages = []
    draw_percentages = []
    
    # Filter the matches based on the filter_even_years argument
    if filter_even_years:
        matches_df = matches_df[matches_df['year'] % 2 == 0]  # Keep only even years
    else:
        matches_df = matches_df[matches_df['year'] % 2 != 0]  # Keep only odd years
    
    # Group matches by year
    grouped = matches_df.groupby('year')
    
    # Iterate through each group (year)
    for year, group in grouped:
        # Reset win, loss, draw counts for the year
        wins, losses, draws = 0, 0, 0
        
        # Iterate through each match in the group
        for _, row in group.iterrows():
            if row['home_team_name'] == team_name:
                if row['home_team_win'] == 1:
                    wins += 1
                elif row['away_team_win'] == 1:
                    losses += 1
                else:
                    draws += 1
            elif row['away_team_name'] == team_name:
                if row['away_team_win'] == 1:
                    wins += 1
                elif row['home_team_win'] == 1:
                    losses += 1
                else:
                    draws += 1
        
        # Calculate win, loss, and draw percentages
        total_games = wins + losses + draws
        win_percentage = (wins / total_games) * 100 if total_games > 0 else 0
        loss_percentage = (losses / total_games) * 100 if total_games > 0 else 0
        draw_percentage = (draws / total_games) * 100 if total_games > 0 else 0
        
        # Append the data for plotting
        years.append(year)
        win_percentages.append(win_percentage)
        loss_percentages.append(loss_percentage)
        draw_percentages.append(draw_percentage)
    
    # Create the stacked area chart using Plotly
    fig = go.Figure()

    # Cumulative sums for stacking
    cumulative_loss = [sum(loss_percentages[:i+1]) for i in range(len(loss_percentages))]
    cumulative_draw = [sum(draw_percentages[:i+1]) for i in range(len(draw_percentages))]
    cumulative_win = [sum(win_percentages[:i+1]) for i in range(len(win_percentages))]
    
    # Add traces for wins, losses, and draws (cumulative)
    fig.add_trace(go.Scatter(
        x=years, 
        y=cumulative_win, 
        mode='none', 
        fill='tonexty',  # Stack to the next trace
        name='Win %', 
        line=dict(color='#4CAF50', width=3),  # Green line for wins
        fillcolor='rgba(76, 175, 80, 0.6)'  # Light green fill
    ))
    
    fig.add_trace(go.Scatter(
        x=years, 
        y=cumulative_loss, 
        mode='none', 
        fill='tonexty',  # Stack to the next trace
        name='Loss %', 
        line=dict(color='#F44336', width=3),  # Red line for losses
        fillcolor='rgba(244, 67, 54, 0.6)'  # Light red fill
    ))
    
    fig.add_trace(go.Scatter(
        x=years, 
        y=cumulative_draw, 
        mode='none', 
        fill='tonexty',  # Stack to the next trace
        name='Draw %', 
        line=dict(color='#2196F3', width=3),  # Blue line for draws
        fillcolor='rgba(33, 150, 243, 0.6)'  # Light blue fill
    ))
    
    # Update the layout of the plot for better aesthetics
    fig.update_layout(
        title=f"Performance of {team_name} Over the {'Even' if filter_even_years else 'Odd'} Years",
        xaxis_title="Year",
        yaxis_title="Percentage (%)",
        xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgray'),
        yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgray'),
        plot_bgcolor='white',  # Background color for the plot area
        paper_bgcolor='rgb(243, 243, 243)',  # Background color for the entire paper
        showlegend=True,
        legend=dict(
            x=1.05,  # Position the legend outside the plot
            y=1,
            traceorder='normal',
            font=dict(size=12),
            bgcolor='rgba(255, 255, 255, 0)',  # Transparent background
            bordercolor='gray',
            borderwidth=1
        ),
        margin=dict(l=50, r=50, t=50, b=50)  # Adjust margins to make the plot more spacious
    )
    
    # Show the plot
    fig.show()


In [53]:
team_performance_over_years('France', matches_df)

In [54]:
import plotly.graph_objects as go

def team_performance_stacked_area(team_name, matches_df, year_type='even'):
    # Initialize variables to hold win, loss, and draw counts
    wins, losses, draws = 0, 0, 0
    
    # Create empty lists to hold the data for plotting
    years = []
    win_percentages = []
    loss_percentages = []
    draw_percentages = []
    
    # Group matches by year
    grouped = matches_df.groupby('year')
    
    # Iterate through each group (year)
    for year, group in grouped:
        # Skip years that don't match the filter (even or odd)
        if (year_type == 'even' and year % 2 != 0) or (year_type == 'odd' and year % 2 == 0):
            continue
        
        # Reset win, loss, draw counts for the year
        wins, losses, draws = 0, 0, 0
        
        # Iterate through each match in the group
        for _, row in group.iterrows():
            if row['home_team_name'] == team_name:
                if row['home_team_win'] == 1:
                    wins += 1
                elif row['away_team_win'] == 1:
                    losses += 1
                else:
                    draws += 1
            elif row['away_team_name'] == team_name:
                if row['away_team_win'] == 1:
                    wins += 1
                elif row['home_team_win'] == 1:
                    losses += 1
                else:
                    draws += 1
        
        # Calculate win, loss, and draw percentages
        total_games = wins + losses + draws
        win_percentage = (wins / total_games) * 100 if total_games > 0 else 0
        loss_percentage = (losses / total_games) * 100 if total_games > 0 else 0
        draw_percentage = (draws / total_games) * 100 if total_games > 0 else 0
        
        # Append the data for plotting
        years.append(year)
        win_percentages.append(win_percentage)
        loss_percentages.append(loss_percentage)
        draw_percentages.append(draw_percentage)
    
    # Create the stacked area chart using Plotly
    fig = go.Figure()

    # Add traces for each performance metric (win, loss, draw)
    fig.add_trace(go.Scatter(
        x=years, 
        y=win_percentages, 
        mode='lines', 
        name='Win %', 
        stackgroup='one',  # Stack all areas
        fillcolor='rgba(76, 175, 80, 0.6)',  # Green with transparency
        line=dict(width=0)
    ))
    fig.add_trace(go.Scatter(
        x=years, 
        y=loss_percentages, 
        mode='lines', 
        name='Loss %', 
        stackgroup='one',  # Stack all areas
        fillcolor='rgba(244, 67, 54, 0.6)',  # Red with transparency
        line=dict(width=0)
    ))
    fig.add_trace(go.Scatter(
        x=years, 
        y=draw_percentages, 
        mode='lines', 
        name='Draw %', 
        stackgroup='one',  # Stack all areas
        fillcolor='rgba(33, 150, 243, 0.6)',  # Blue with transparency
        line=dict(width=0)
    ))

    # Update the layout for better aesthetics
    fig.update_layout(
        title=f"Performance of {team_name} Over the Years",
        xaxis_title="Year",
        yaxis_title="Percentage (%)",
        xaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgray'),
        yaxis=dict(showgrid=True, gridwidth=1, gridcolor='lightgray'),
        plot_bgcolor='white',
        paper_bgcolor='rgb(243, 243, 243)',
        showlegend=True,
        legend=dict(x=1.05, y=1, traceorder='normal', font=dict(size=12), bgcolor='rgba(255, 255, 255, 0)', bordercolor='gray', borderwidth=1),
        margin=dict(l=50, r=50, t=50, b=50)
    )
    
    # Show the plot
    fig.show()


In [58]:
team_performance_stacked_area('Brazil', matches_df, year_type=False)

In [57]:
team_performance_stacked_area('Brazil', matches_df)

### Comparing Plotly to Another Mapping Package

In [59]:
import plotly.express as px
import pandas as pd

def world_cup_win_percentage_map(matches_df):
    """
    Creates a world map visualizing each country's win percentage.
    
    Parameters:
    - matches_df: DataFrame containing match results with columns ['home_team_name', 'away_team_name', 'home_team_win', 'away_team_win'].
    """

    # Initialize dictionary to store results
    country_stats = {}

    # Iterate through the matches to compute stats
    for _, row in matches_df.iterrows():
        # Home team processing
        home_team = row['home_team_name']
        away_team = row['away_team_name']
        home_win = row['home_team_win']
        away_win = row['away_team_win']

        # Update home team stats
        if home_team not in country_stats:
            country_stats[home_team] = {'wins': 0, 'games': 0}
        if away_team not in country_stats:
            country_stats[away_team] = {'wins': 0, 'games': 0}

        country_stats[home_team]['games'] += 1
        country_stats[away_team]['games'] += 1

        if home_win == 1:
            country_stats[home_team]['wins'] += 1
        elif away_win == 1:
            country_stats[away_team]['wins'] += 1

    # Convert dictionary to DataFrame
    country_data = []
    for country, stats in country_stats.items():
        win_percentage = (stats['wins'] / stats['games']) * 100 if stats['games'] > 0 else 0
        country_data.append({'country': country, 'win_percentage': win_percentage})

    df = pd.DataFrame(country_data)

    # Plot world map
    fig = px.choropleth(
        df, 
        locations='country',  # Match with country names
        locationmode='country names',  # Use full country names
        color='win_percentage',
        color_continuous_scale='Viridis',  # Green to yellow to blue color scale
        title="World Cup Team Win Percentages",
        labels={'win_percentage': 'Win %'},
        projection="natural earth"
    )

    fig.update_layout(
        geo=dict(showcoastlines=True, showland=True, landcolor="lightgray"),
        coloraxis_colorbar=dict(title="Win %")
    )

    fig.show()

In [60]:
world_cup_win_percentage_map(matches_df)