# CPL Analytics - Basic Analysis

This notebook demonstrates how to load and analyze Canadian Premier League data from the CPL Analytics dataset.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('../scripts')

from data_loader import CPLDataLoader

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

## 1. Load the Data

In [None]:
# Initialize data loader
loader = CPLDataLoader('../data')

# Load all matches
matches = loader.load_matches()
print(f"Total matches: {len(matches)}")
matches.head()

In [None]:
# Check available columns
print("Columns:", matches.columns.tolist())
print("\nSeasons:", sorted(matches['season'].unique()))
print("Teams:", sorted(matches['home_team'].unique()))

## 2. Goals Analysis

In [None]:
# Calculate total goals per match
matches['total_goals'] = matches['home_goals'] + matches['away_goals']

# Summary statistics
print("Goals per match statistics:")
print(matches['total_goals'].describe())

In [None]:
# Goals distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Total goals histogram
axes[0].hist(matches['total_goals'], bins=range(0, 12), edgecolor='black', alpha=0.7)
axes[0].set_xlabel('Total Goals')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Distribution of Total Goals per Match')
axes[0].axvline(matches['total_goals'].mean(), color='red', linestyle='--', label=f'Mean: {matches["total_goals"].mean():.2f}')
axes[0].legend()

# Home vs Away goals
axes[1].boxplot([matches['home_goals'], matches['away_goals']], labels=['Home', 'Away'])
axes[1].set_ylabel('Goals')
axes[1].set_title('Home vs Away Goals Distribution')

plt.tight_layout()
plt.show()

In [None]:
# Home advantage analysis
home_wins = (matches['home_goals'] > matches['away_goals']).sum()
away_wins = (matches['home_goals'] < matches['away_goals']).sum()
draws = (matches['home_goals'] == matches['away_goals']).sum()

print(f"Home Wins: {home_wins} ({home_wins/len(matches)*100:.1f}%)")
print(f"Away Wins: {away_wins} ({away_wins/len(matches)*100:.1f}%)")
print(f"Draws: {draws} ({draws/len(matches)*100:.1f}%)")

# Pie chart
plt.figure(figsize=(8, 6))
plt.pie([home_wins, away_wins, draws], labels=['Home Win', 'Away Win', 'Draw'],
        autopct='%1.1f%%', colors=['#2ecc71', '#e74c3c', '#95a5a6'])
plt.title('Match Outcomes in CPL')
plt.show()

## 3. Team Analysis

In [None]:
# Get standings for latest season
latest_season = matches['season'].max()
standings = loader.get_standings(latest_season)
print(f"\n{latest_season} Season Standings:")
standings

In [None]:
# Team performance visualization
if not standings.empty:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Points bar chart
    colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(standings)))
    axes[0].barh(standings['team'], standings['points'], color=colors)
    axes[0].set_xlabel('Points')
    axes[0].set_title(f'{latest_season} Season Points')
    axes[0].invert_yaxis()
    
    # Goal difference
    colors = ['green' if x > 0 else 'red' for x in standings['goal_difference']]
    axes[1].barh(standings['team'], standings['goal_difference'], color=colors)
    axes[1].set_xlabel('Goal Difference')
    axes[1].set_title(f'{latest_season} Goal Difference')
    axes[1].axvline(0, color='black', linewidth=0.5)
    axes[1].invert_yaxis()
    
    plt.tight_layout()
    plt.show()

## 4. Head-to-Head Analysis

In [None]:
# Example: Forge FC vs Cavalry FC head-to-head
h2h = loader.get_head_to_head('Forge FC', 'Cavalry FC')
print(f"Forge FC vs Cavalry FC - {len(h2h)} matches")
h2h[['date', 'home_team', 'away_team', 'home_goals', 'away_goals']]

In [None]:
# H2H Summary
if not h2h.empty:
    forge_wins = 0
    cavalry_wins = 0
    h2h_draws = 0
    
    for _, match in h2h.iterrows():
        if match['home_team'] == 'Forge FC':
            if match['home_goals'] > match['away_goals']:
                forge_wins += 1
            elif match['home_goals'] < match['away_goals']:
                cavalry_wins += 1
            else:
                h2h_draws += 1
        else:
            if match['away_goals'] > match['home_goals']:
                forge_wins += 1
            elif match['away_goals'] < match['home_goals']:
                cavalry_wins += 1
            else:
                h2h_draws += 1
    
    print(f"Forge FC wins: {forge_wins}")
    print(f"Cavalry FC wins: {cavalry_wins}")
    print(f"Draws: {h2h_draws}")

## 5. Form Analysis

In [None]:
# Get current form for all teams
teams = matches['home_team'].unique()

form_data = []
for team in teams:
    form = loader.get_recent_form(team, 5)
    wins = form.count('W')
    draws = form.count('D')
    losses = form.count('L')
    form_data.append({
        'team': team,
        'form': form,
        'wins': wins,
        'draws': draws,
        'losses': losses,
        'points': wins * 3 + draws
    })

form_df = pd.DataFrame(form_data).sort_values('points', ascending=False)
form_df

## 6. Over/Under Analysis

In [None]:
# Over 2.5 goals analysis
matches['over_25'] = matches['total_goals'] > 2.5
matches['over_15'] = matches['total_goals'] > 1.5
matches['over_35'] = matches['total_goals'] > 3.5

print(f"Over 1.5: {matches['over_15'].mean()*100:.1f}%")
print(f"Over 2.5: {matches['over_25'].mean()*100:.1f}%")
print(f"Over 3.5: {matches['over_35'].mean()*100:.1f}%")

In [None]:
# Over 2.5 by team (home matches)
over25_by_team = matches.groupby('home_team')['over_25'].mean().sort_values(ascending=False)

plt.figure(figsize=(10, 6))
over25_by_team.plot(kind='bar', color='steelblue', edgecolor='black')
plt.axhline(matches['over_25'].mean(), color='red', linestyle='--', label=f'League avg: {matches["over_25"].mean()*100:.1f}%')
plt.xlabel('Team')
plt.ylabel('Over 2.5 Rate')
plt.title('Over 2.5 Goals Rate by Home Team')
plt.xticks(rotation=45, ha='right')
plt.legend()
plt.tight_layout()
plt.show()

## 7. BTTS (Both Teams to Score) Analysis

In [None]:
# BTTS analysis
matches['btts'] = (matches['home_goals'] > 0) & (matches['away_goals'] > 0)

print(f"BTTS Rate: {matches['btts'].mean()*100:.1f}%")

# BTTS by team
btts_home = matches.groupby('home_team')['btts'].mean().sort_values(ascending=False)
btts_away = matches.groupby('away_team')['btts'].mean().sort_values(ascending=False)

print("\nBTTS when playing at home:")
print(btts_home)

## 8. Season Trends

In [None]:
# Goals per season
season_stats = matches.groupby('season').agg({
    'home_goals': 'mean',
    'away_goals': 'mean',
    'total_goals': 'mean',
    'over_25': 'mean'
}).round(2)

season_stats.columns = ['Avg Home Goals', 'Avg Away Goals', 'Avg Total Goals', 'Over 2.5 Rate']
season_stats

In [None]:
# Visualize trends
fig, ax = plt.subplots(figsize=(10, 5))

ax.plot(season_stats.index, season_stats['Avg Total Goals'], marker='o', linewidth=2, label='Avg Goals/Match')
ax.fill_between(season_stats.index, season_stats['Avg Total Goals'], alpha=0.3)

ax.set_xlabel('Season')
ax.set_ylabel('Goals per Match')
ax.set_title('CPL Goals per Match Trend')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Next Steps

- Check out `poisson_model.py` for a betting model example
- Use the weather data for advanced analysis
- Combine with historical odds for value betting analysis