# Premier League Matchweek 26 - Player Analysis

This notebook provides an interactive analysis of player performance predictions for Premier League Matchweek 26.

## Analysis Categories
1. **Top Goal Scorers** - Players most likely to score goals
2. **Top Assist Providers** - Players most likely to provide assists
3. **Clean Sheet Candidates** - Teams/goalkeepers most likely to keep clean sheets

## Data Sources
- **FotMob API** - Primary source for xG, xA, and player statistics
- **SofaScore API** - Supplementary source for fixtures and team data

## 1. Setup and Initialization

In [None]:
# Import required libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from src.analysis.player_analyzer import PlayerAnalyzer
from src.scrapers.fotmob_scraper import FotMobScraper
from src.scrapers.sofascore_scraper import SofaScoreScraper

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úì Libraries imported successfully")

In [None]:
# Initialize scrapers and analyzer
fotmob = FotMobScraper()
sofascore = SofaScoreScraper()
analyzer = PlayerAnalyzer(fotmob_scraper=fotmob, sofascore_scraper=sofascore)

print("‚úì PlayerAnalyzer initialized")
print(f"  - Premier League ID (FotMob): {analyzer.config['premier_league']['fotmob_league_id']}")
print(f"  - Tournament ID (SofaScore): {analyzer.config['premier_league']['sofascore_tournament_id']}")
print(f"  - Season: {analyzer.config['premier_league']['season']}")

## 2. Fetch Matchweek Fixtures

In [None]:
# Fetch fixtures for matchweek 26
MATCHWEEK = 26

fixtures = analyzer.get_premier_league_fixtures(matchweek=MATCHWEEK)

if fixtures:
    fixtures_df = pd.DataFrame(fixtures)
    print(f"\nüìÖ Found {len(fixtures)} fixtures for Matchweek {MATCHWEEK}:\n")
    display(fixtures_df[['home_team', 'away_team', 'date']].head(10))
else:
    print(f"‚ö†Ô∏è No fixtures found for matchweek {MATCHWEEK}")
    print("This may be due to network restrictions or incorrect season/matchweek.")

## 3. Generate Complete Analysis Report

In [None]:
# Generate comprehensive analysis
report = analyzer.generate_analysis_report(matchweek=MATCHWEEK, top_n=20)

print(f"\nüìä Analysis Report Generated:")
print(f"  - Fixtures: {len(report['fixtures'])}")
print(f"  - Top Goal Scorers: {len(report['top_goal_scorers'])}")
print(f"  - Top Assist Providers: {len(report['top_assist_providers'])}")
print(f"  - Clean Sheet Candidates: {len(report['clean_sheet_candidates'])}")

## 4. Top Goal Scorers Analysis

In [None]:
# Display top goal scorers
if report['top_goal_scorers']:
    goal_scorers_df = pd.DataFrame(report['top_goal_scorers'])
    goal_scorers_df['goal_probability_pct'] = goal_scorers_df['goal_probability'] * 100
    
    print("\n‚öΩ TOP 20 PLAYERS - GOAL SCORING PROBABILITY\n")
    display_cols = ['rank', 'name', 'team', 'opponent', 'goal_probability_pct', 'xg_per_90', 'form_score']
    display(goal_scorers_df[display_cols].head(20))
else:
    print("\n‚ö†Ô∏è No goal scorer data available")

In [None]:
# Visualize top 10 goal scorers
if report['top_goal_scorers']:
    top_10 = pd.DataFrame(report['top_goal_scorers']).head(10)
    
    fig = px.bar(
        top_10,
        x='name',
        y='goal_probability',
        title='Top 10 Players - Goal Scoring Probability',
        labels={'goal_probability': 'Probability', 'name': 'Player'},
        color='xg_per_90',
        color_continuous_scale='Reds'
    )
    fig.update_layout(xaxis_tickangle=-45, height=500)
    fig.show()
else:
    print("No data to visualize")

## 5. Top Assist Providers Analysis

In [None]:
# Display top assist providers
if report['top_assist_providers']:
    assist_providers_df = pd.DataFrame(report['top_assist_providers'])
    assist_providers_df['assist_probability_pct'] = assist_providers_df['assist_probability'] * 100
    
    print("\nüéØ TOP 20 PLAYERS - ASSIST PROBABILITY\n")
    display_cols = ['rank', 'name', 'team', 'opponent', 'assist_probability_pct', 'xa_per_90', 'key_passes']
    display(assist_providers_df[display_cols].head(20))
else:
    print("\n‚ö†Ô∏è No assist provider data available")

In [None]:
# Visualize top 10 assist providers
if report['top_assist_providers']:
    top_10_assists = pd.DataFrame(report['top_assist_providers']).head(10)
    
    fig = px.bar(
        top_10_assists,
        x='name',
        y='assist_probability',
        title='Top 10 Players - Assist Probability',
        labels={'assist_probability': 'Probability', 'name': 'Player'},
        color='xa_per_90',
        color_continuous_scale='Blues'
    )
    fig.update_layout(xaxis_tickangle=-45, height=500)
    fig.show()
else:
    print("No data to visualize")

## 6. Clean Sheet Candidates Analysis

In [None]:
# Display clean sheet candidates
if report['clean_sheet_candidates']:
    clean_sheets_df = pd.DataFrame(report['clean_sheet_candidates'])
    clean_sheets_df['clean_sheet_probability_pct'] = clean_sheets_df['clean_sheet_probability'] * 100
    
    print("\nüß§ TOP 10 TEAMS - CLEAN SHEET PROBABILITY\n")
    display_cols = ['rank', 'team', 'goalkeeper', 'opponent', 'clean_sheet_probability_pct', 'recent_clean_sheets']
    display(clean_sheets_df[display_cols].head(10))
else:
    print("\n‚ö†Ô∏è No clean sheet data available")

In [None]:
# Visualize clean sheet probabilities
if report['clean_sheet_candidates']:
    top_10_cs = pd.DataFrame(report['clean_sheet_candidates']).head(10)
    
    fig = px.bar(
        top_10_cs,
        x='team',
        y='clean_sheet_probability',
        title='Top 10 Teams - Clean Sheet Probability',
        labels={'clean_sheet_probability': 'Probability', 'team': 'Team'},
        color='recent_clean_sheets',
        color_continuous_scale='Greens'
    )
    fig.update_layout(xaxis_tickangle=-45, height=500)
    fig.show()
else:
    print("No data to visualize")

## 7. Combined Analysis Visualizations

In [None]:
# Create a comprehensive comparison chart
if report['top_goal_scorers']:
    # Scatter plot: xG vs Actual Goals (if data available)
    goal_scorers_df = pd.DataFrame(report['top_goal_scorers']).head(20)
    
    fig = px.scatter(
        goal_scorers_df,
        x='xg_per_90',
        y='goal_probability',
        size='form_score',
        color='team',
        hover_data=['name', 'opponent'],
        title='Goal Probability vs Expected Goals (xG) per 90 minutes',
        labels={
            'xg_per_90': 'xG per 90 minutes',
            'goal_probability': 'Goal Probability'
        }
    )
    fig.update_layout(height=600)
    fig.show()
else:
    print("No data available for visualization")

## 8. Export Results

In [None]:
# Export to CSV
import json
from pathlib import Path

output_dir = Path('../data/processed')
output_dir.mkdir(parents=True, exist_ok=True)

# Save JSON report
json_path = output_dir / f'matchweek_{MATCHWEEK}_analysis.json'
with open(json_path, 'w') as f:
    json.dump(report, f, indent=2, default=str)
print(f"\n‚úì Report saved to {json_path}")

# Save CSV files
if report['top_goal_scorers']:
    pd.DataFrame(report['top_goal_scorers']).to_csv(
        output_dir / f'matchweek_{MATCHWEEK}_goal_scorers.csv',
        index=False
    )
    print(f"‚úì Goal scorers saved to CSV")

if report['top_assist_providers']:
    pd.DataFrame(report['top_assist_providers']).to_csv(
        output_dir / f'matchweek_{MATCHWEEK}_assist_providers.csv',
        index=False
    )
    print(f"‚úì Assist providers saved to CSV")

if report['clean_sheet_candidates']:
    pd.DataFrame(report['clean_sheet_candidates']).to_csv(
        output_dir / f'matchweek_{MATCHWEEK}_clean_sheets.csv',
        index=False
    )
    print(f"‚úì Clean sheets saved to CSV")

## 9. Summary Statistics

In [None]:
# Print summary statistics
print("\n" + "="*80)
print(f"PREMIER LEAGUE MATCHWEEK {MATCHWEEK} - ANALYSIS SUMMARY")
print("="*80)

if report['top_goal_scorers']:
    goal_probs = [p['goal_probability'] for p in report['top_goal_scorers']]
    print(f"\n‚öΩ Goal Scoring Probabilities:")
    print(f"  - Average: {np.mean(goal_probs)*100:.1f}%")
    print(f"  - Highest: {max(goal_probs)*100:.1f}%")
    print(f"  - Lowest: {min(goal_probs)*100:.1f}%")

if report['top_assist_providers']:
    assist_probs = [p['assist_probability'] for p in report['top_assist_providers']]
    print(f"\nüéØ Assist Probabilities:")
    print(f"  - Average: {np.mean(assist_probs)*100:.1f}%")
    print(f"  - Highest: {max(assist_probs)*100:.1f}%")
    print(f"  - Lowest: {min(assist_probs)*100:.1f}%")

if report['clean_sheet_candidates']:
    cs_probs = [t['clean_sheet_probability'] for t in report['clean_sheet_candidates']]
    print(f"\nüß§ Clean Sheet Probabilities:")
    print(f"  - Average: {np.mean(cs_probs)*100:.1f}%")
    print(f"  - Highest: {max(cs_probs)*100:.1f}%")
    print(f"  - Lowest: {min(cs_probs)*100:.1f}%")

print("\n" + "="*80)