In [1]:

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

# Load and clean dataset
try:
    df = pd.read_csv('players_22.csv', low_memory=False)
except FileNotFoundError:
    print("Error: 'players_22.csv' not found. Ensure the file is in the working directory.")
    exit()

relevant_columns = [
    'short_name', 'age', 'overall', 'potential', 'value_eur', 'wage_eur', 'player_positions',
    'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'power_stamina',
    'mentality_composure', 'attacking_finishing', 'nationality_name'
]
df = df[relevant_columns].fillna({
    'pace': df['pace'].mean(), 'shooting': df['shooting'].mean(), 'passing': df['passing'].mean(),
    'dribbling': df['dribbling'].mean(), 'defending': df['defending'].mean(),
    'physic': df['physic'].mean(), 'value_eur': df['value_eur'].median(),
    'wage_eur': df['wage_eur'].median(), 'nationality_name': 'Unknown'
})

# Compute composite scores
df['technical_score'] = df[['passing', 'dribbling', 'attacking_finishing']].mean(axis=1)
df['physical_score'] = df[['pace', 'physic', 'power_stamina']].mean(axis=1)

# 1. Histogram: Distribution of Overall Ratings
hist_fig = go.Figure()
hist_data, bins = np.histogram(df['overall'], bins=30)
hist_fig.add_trace(go.Histogram(x=df['overall'], nbinsx=30, name='Overall', histnorm='probability density'))
hist_fig.add_trace(go.Scatter(x=bins, y=hist_data / hist_data.sum() / (bins[1] - bins[0]), mode='lines', name='KDE'))
hist_fig.update_layout(
    title='Distribution of Overall Player Ratings',
    xaxis_title='Overall Rating',
    yaxis_title='Count',
    template='plotly_dark'
)
hist_insights = [
    f"Most players have overall ratings between {int(df['overall'].quantile(0.25))} and {int(df['overall'].quantile(0.75))}, indicating a concentrated middle range.",
    "The distribution is slightly right-skewed, with fewer players achieving elite ratings (>85).",
    f"The average overall rating is {df['overall'].mean():.1f}, with a standard deviation of {df['overall'].std():.1f}."
]

# 2. Scatter Plot: Age vs. Potential
scatter_fig = px.scatter(
    df, x='age', y='potential', size='value_eur', color='overall',
    hover_data=['short_name', 'player_positions'], opacity=0.6,
    title='Age vs. Potential (Size: Market Value, Color: Overall)'
)
scatter_fig.update_layout(xaxis_title='Age', yaxis_title='Potential', template='plotly_dark')
scatter_insights = [
    "Younger players (<25 years) tend to have higher potential, often exceeding their current overall rating.",
    f"Players with high market value (larger bubbles) are typically younger and have high overall ratings (>80).",
    f"There is a negative correlation between age and potential (r={df['age'].corr(df['potential']):.2f}), as older players have less room for growth."
]

# 3. Heatmap: Attribute Correlations
correlation_matrix = df[['technical_score', 'physical_score', 'overall', 'potential', 'value_eur', 'age']].corr()
heatmap_fig = go.Figure(data=go.Heatmap(
    z=correlation_matrix.values,
    x=correlation_matrix.columns,
    y=correlation_matrix.columns,
    colorscale='RdBu',
    zmin=-1, zmax=1,
    text=correlation_matrix.values.round(2),
    texttemplate='%{text}',
    showscale=True
))
heatmap_fig.update_layout(title='Correlation Heatmap of Key Attributes', template='plotly_dark')
heatmap_insights = [
    f"Overall rating has a strong positive correlation with technical score (r={correlation_matrix.loc['overall', 'technical_score']:.2f}) and potential (r={correlation_matrix.loc['overall', 'potential']:.2f}).",
    f"Age has a negative correlation with potential (r={correlation_matrix.loc['age', 'potential']:.2f}), indicating younger players have higher growth potential.",
    "Market value (value_eur) correlates moderately with overall and potential, reflecting player quality."
]

# 4. Bar Plot: Top 10 Players by Technical Score
top_technical = df.nlargest(10, 'technical_score')[['short_name', 'technical_score']]
bar_fig = px.bar(
    top_technical, x='short_name', y='technical_score',
    title='Top 10 Players by Technical Score'
)
bar_fig.update_layout(xaxis_title='Player', yaxis_title='Technical Score', template='plotly_dark')
bar_insights = [
    f"{top_technical['short_name'].iloc[0]} leads with the highest technical score ({top_technical['technical_score'].iloc[0]:.1f}), excelling in passing, dribbling, and finishing.",
    "Top technical players are predominantly attackers, highlighting the importance of technical skills in offensive roles.",
    f"The top 10 players have technical scores above {top_technical['technical_score'].min():.1f}, indicating elite performance."
]

# 5. Box Plot: Attribute Variability by Position
df['primary_position'] = df['player_positions'].str.split(',').str[0]
box_fig = px.box(
    df, x='primary_position', y='dribbling',
    title='Dribbling Variability by Position'
)
box_fig.update_layout(xaxis_title='Position', yaxis_title='Dribbling Score', template='plotly_dark')
box_insights = [
    f"Wingers (RW, LW) and attacking midfielders (CAM) have the highest median dribbling scores, typically above {df[df['primary_position'].isin(['RW', 'LW', 'CAM'])]['dribbling'].median():.1f}.",
    f"Defensive positions (CB, CDM) show lower dribbling scores, with medians around {df[df['primary_position'].isin(['CB', 'CDM'])]['dribbling'].median():.1f}.",
    "High variability in dribbling for midfield and attacking roles indicates diverse skill levels."
]

# 6. Radar Chart: Player Profile Comparison
def plot_radar(players, attributes, title):
    fig = go.Figure()
    for player in players:
        player_data = df[df['short_name'].str.contains(player, case=False, na=False)]
        if player_data.empty:
            print(f"Warning: Player '{player}' not found in dataset. Skipping.")
            continue
        fig.add_trace(go.Scatterpolar(
            r=player_data[attributes].iloc[0].values,
            theta=attributes,
            fill='toself',
            name=player_data['short_name'].iloc[0]
        ))
    fig.update_layout(
        polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
        showlegend=True,
        title=title,
        template='plotly_dark'
    )
    return fig

# Verify player names
print("Available player names (partial match for Messi or Ronaldo):")
print(df[df['short_name'].str.contains('Messi|Ronaldo', case=False, na=False)]['short_name'].unique())

radar_attrs = ['pace', 'shooting', 'passing', 'dribbling', 'physic']
players = ['L. Messi', 'Cristiano Ronaldo']
valid_players = []
for player in players:
    player_data = df[df['short_name'].str.contains(player, case=False, na=False)]
    if not player_data.empty:
        valid_players.append(player_data['short_name'].iloc[0])
    else:
        print(f"Player '{player}' not found.")
if len(valid_players) < 2:
    print("Falling back to top 2 players by overall rating.")
    valid_players = df.nlargest(2, 'overall')['short_name'].tolist()
radar_fig = plot_radar(valid_players, radar_attrs, 'Player Profile Comparison')
radar_insights = [
    f"{valid_players[0]} excels in dribbling and passing, while {valid_players[1]} is stronger in shooting and pace.",
    "Both players have well-rounded profiles, with scores above 80 in most attributes.",
    "Differences highlight distinct playing styles: technical finesse vs. physical power."
]

# 7. Line Plot: Simulated Performance Trend
years = [2019, 2020, 2021, 2022]
player_trend = pd.DataFrame({
    'Year': years,
    'Messi_Overall': [94, 93, 92, 91],
    'Ronaldo_Overall': [93, 92, 91, 90]
})
line_fig = px.line(
    player_trend, x='Year', y=['Messi_Overall', 'Ronaldo_Overall'],
    title='Overall Rating Trend (Simulated)'
)
line_fig.update_layout(yaxis_title='Overall Rating', template='plotly_dark')
line_insights = [
    "Simulated data shows a gradual decline in overall ratings for both Messi and Ronaldo from 2019 to 2022.",
    "Messi maintains a slight edge over Ronaldo each year, though both remain elite (>90).",
    "The consistent downward trend may reflect aging effects on performance."
]

# 8. Pie Chart: Positional Distribution
position_counts = df['primary_position'].value_counts()
pie_fig = px.pie(
    values=position_counts.values,
    names=position_counts.index,
    title='Positional Distribution of Players'
)
pie_fig.update_layout(template='plotly_dark')
pie_insights = [
    f"Center Backs (CB) and Strikers (ST) are the most common positions, making up {position_counts.loc[['CB', 'ST']].sum() / position_counts.sum() * 100:.1f}% of players.",
    f"Less common positions like Center Forwards (CF) and Wing Backs (LWELCOME TO FIFA 22 ANALYSISB, RWB) represent under {position_counts.loc[['CF', 'LWB', 'RWB']].sum() / position_counts.sum() * 100:.1f}% of players.",
    "The distribution reflects a balance between defensive and offensive roles."
]

# 9. Choropleth Map: Player Distribution by Nationality
# Debug: Inspect nationality data
print("Unique nationality names in dataset:")
print(df['nationality_name'].unique())
nationality_counts = df['nationality_name'].value_counts().reset_index()
nationality_counts.columns = ['nationality_name', 'player_count']

# Compute additional metrics for tooltip
# Average overall rating and age per country
country_stats = df.groupby('nationality_name').agg({
    'overall': 'mean',
    'age': 'mean'
}).reset_index()
country_stats.columns = ['nationality_name', 'avg_overall', 'avg_age']

# Top player per country (by overall rating)
top_players = df.loc[df.groupby('nationality_name')['overall'].idxmax()][['nationality_name', 'short_name', 'overall']]
top_players.columns = ['nationality_name', 'top_player', 'top_player_rating']

# Merge metrics into nationality_counts
nationality_counts = nationality_counts.merge(country_stats, on='nationality_name', how='left')
nationality_counts = nationality_counts.merge(top_players[['nationality_name', 'top_player']], on='nationality_name', how='left')

# Map England to United Kingdom for Plotly compatibility
nationality_counts['nationality_name'] = nationality_counts['nationality_name'].replace({'England': 'United Kingdom'})

# Clean data
nationality_counts = nationality_counts[nationality_counts['nationality_name'] != 'Unknown']
nationality_counts = nationality_counts.sort_values('player_count', ascending=False)

# Debug: Verify England's data
print("Top 5 countries by player count with tooltip data:")
print(nationality_counts[['nationality_name', 'player_count', 'top_player', 'avg_overall', 'avg_age']].head(5))
print(f"United Kingdom data: {nationality_counts[nationality_counts['nationality_name'] == 'United Kingdom'][['nationality_name', 'player_count', 'top_player', 'avg_overall', 'avg_age']].to_dict('records')}")
england_index = nationality_counts[nationality_counts['nationality_name'] == 'United Kingdom'].index
print(f"Index number of United Kingdom (England): {england_index[0] if not england_index.empty else 'Not found'}")

# Create choropleth map with custom tooltip
map_fig = px.choropleth(
    nationality_counts,
    locations='nationality_name',
    locationmode='country names',
    color='player_count',
    hover_name='nationality_name',
    custom_data=['player_count', 'top_player', 'avg_overall', 'avg_age'],
    color_continuous_scale='Viridis',
    title='Player Distribution by Nationality'
)
map_fig.update_traces(
    hovertemplate=(
        '<b>%{hovertext}</b><br>' +
        'Player Count: %{customdata[0]}<br>' +
        'Top Player: %{customdata[1]}<br>' +
        'Avg Overall Rating: %{customdata[2]:.1f}<br>' +
        'Avg Age: %{customdata[3]:.1f}<br>' +
        '<extra></extra>'
    )
)
map_fig.update_layout(template='plotly_dark', geo=dict(showframe=False, projection_type='equirectangular'))
map_insights = [
    f"The top country by player count is {nationality_counts['nationality_name'].iloc[0]} with {nationality_counts['player_count'].iloc[0]} players.",
    f"The top 5 countries account for {nationality_counts['player_count'].nlargest(5).sum() / nationality_counts['player_count'].sum() * 100:.1f}% of all players.",
    "Player distribution is uneven, with Europe and South America having higher representation."
]

# Generate HTML with charts and insights
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <title>FIFA Players Analysis</title>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/css/bootstrap.min.css">
    <style>
        body {{ background-color: #1a2d46; color: #ffffff; font-family: Arial, sans-serif; }}
        .container {{ padding: 20px; }}
        .chart-row {{ display: flex; flex-wrap: wrap; margin-bottom: 20px; }}
        .chart-col {{ flex: 0 0 50%; max-width: 50%; padding: 10px; }}
        .insights {{ padding: 10px; }}
        h1 {{ text-align: center; margin-bottom: 20px; }}
        h4 {{ margin-top: 10px; }}
        ul {{ font-size: 14px; }}
    </style>
</head>
<body>
    <div class="container">
        <h1>FIFA Players Analysis Dashboard</h1>
        <div class="chart-row">
            <div class="chart-col">
                {0}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{1}</ul>
                </div>
            </div>
            <div class="chart-col">
                {2}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{3}</ul>
                </div>
            </div>
        </div>
        <div class="chart-row">
            <div class="chart-col">
                {4}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{5}</ul>
                </div>
            </div>
            <div class="chart-col">
                {6}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{7}</ul>
                </div>
            </div>
        </div>
        <div class="chart-row">
            <div class="chart-col">
                {8}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{9}</ul>
                </div>
            </div>
            <div class="chart-col">
                {10}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{11}</ul>
                </div>
            </div>
        </div>
        <div class="chart-row">
            <div class="chart-col">
                {12}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{13}</ul>
                </div>
            </div>
            <div class="chart-col">
                {14}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{15}</ul>
                </div>
            </div>
        </div>
        <div class="chart-row">
            <div class="chart-col">
                {16}
                <div class="insights">
                    <h4>Key Insights</h4>
                    <ul>{17}</ul>
                </div>
            </div>
            <div class="chart-col">
                <!-- Empty column to maintain 2x5 grid -->
            </div>
        </div>
    </div>
</body>
</html>
'''

# Format insights as HTML list items
insights_html = [
    ''.join(f'<li>{insight}</li>' for insight in insights)
    for insights in [hist_insights, scatter_insights, heatmap_insights, bar_insights,
                    box_insights, radar_insights, line_insights, pie_insights, map_insights]
]

# Generate HTML for each chart
charts_html = [
    pio.to_html(fig, include_plotlyjs=False, full_html=False)
    for fig in [hist_fig, scatter_fig, heatmap_fig, bar_fig, box_fig, radar_fig, line_fig, pie_fig, map_fig]
]

# Combine HTML content
html_output = html_content.format(
    charts_html[0], insights_html[0],
    charts_html[1], insights_html[1],
    charts_html[2], insights_html[2],
    charts_html[3], insights_html[3],
    charts_html[4], insights_html[4],
    charts_html[5], insights_html[5],
    charts_html[6], insights_html[6],
    charts_html[7], insights_html[7],
    charts_html[8], insights_html[8]
)

# Save to file
with open('fifa_analysis4.html', 'w') as f:
    f.write(html_output)

print("Interactive dashboard saved as 'fifa_analysis4.html'")

Available player names (partial match for Messi or Ronaldo):
['L. Messi' 'Cristiano Ronaldo' 'Ronaldo Cabrais' 'Ronaldo Esler'
 'Junior Messias' 'Ronaldo Vieira' 'Ronaldo' 'Ronaldo Mendes']
Unique nationality names in dataset:
['Argentina' 'Poland' 'Portugal' 'Brazil' 'Belgium' 'Slovenia' 'France'
 'Germany' 'England' 'Korea Republic' 'Netherlands' 'Senegal' 'Egypt'
 'Italy' 'Spain' 'Uruguay' 'Costa Rica' 'Norway' 'Croatia' 'Scotland'
 'Algeria' 'Slovakia' 'Denmark' 'Switzerland' 'Hungary' 'Gabon' 'Serbia'
 'Nigeria' 'Morocco' 'Sweden' 'Austria' 'Montenegro' "Côte d'Ivoire"
 'Mexico' 'Bosnia and Herzegovina' 'Finland' 'Greece' 'Armenia' 'Colombia'
 'Cameroon' 'Ghana' 'Wales' 'Russia' 'Turkey' 'United States' 'Jamaica'
 'Canada' 'Czech Republic' 'Chile' 'Ukraine' 'Venezuela' 'Togo'
 'Burkina Faso' 'Northern Ireland' 'Congo DR' 'Israel' 'Albania' 'Guinea'
 'Iceland' 'China PR' 'New Zealand' 'Central African Republic' 'Peru'
 'Mali' 'Japan' 'North Macedonia' 'Ecuador' 'Iran' 'Republic of 