# Content Testing - NCAA Soccer Visualizations

This notebook contains 7 different visualization ideas to create diverse and engaging content for social media and analysis.

**Prerequisites:** Run the main `ncaaMatchPredictor.ipynb` notebook first to ensure all data is loaded and processed.

## 1. Conference Strength Comparison
Compare the average strength (STR) of teams across different conferences.

In [None]:
# Conference Strength Comparison
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Calculate average strength by conference
conference_strength = overall_df.groupby('Conference')['STR'].mean().sort_values(ascending=True)

# Create horizontal bar chart
plt.figure(figsize=(12, 10))
bars = conference_strength.plot(kind='barh', color='skyblue', edgecolor='black', linewidth=1.2)
plt.title('Average Team Strength by Conference', fontsize=20, fontweight='bold', pad=20)
plt.xlabel('Average Strength Rating', fontsize=14, fontweight='bold')
plt.ylabel('Conference', fontsize=14, fontweight='bold')
plt.grid(axis='x', linestyle='--', alpha=0.7)

# Add value labels on bars
for i, v in enumerate(conference_strength.values):
    plt.text(v + 0.01, i, f'{v:.2f}', va='center', fontweight='bold')

plt.tight_layout()
plt.savefig('conference_strength_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"📊 Conference strength comparison saved!")
print(f"🏆 Strongest conference: {conference_strength.idxmax()} ({conference_strength.max():.2f})")
print(f"📉 Weakest conference: {conference_strength.idxmin()} ({conference_strength.min():.2f})")

## 2. Top Teams by Strength
Highlight the top 10 teams by their STR value.

In [None]:
# Top Teams by Strength
top_teams = overall_df.sort_values(by='STR', ascending=False).head(10)

plt.figure(figsize=(14, 8))
bars = plt.barh(range(len(top_teams)), top_teams['STR'], 
                color=plt.cm.viridis(np.linspace(0, 1, len(top_teams))))

# Customize the plot
plt.yticks(range(len(top_teams)), top_teams['Team'])
plt.xlabel('Strength Rating', fontsize=14, fontweight='bold')
plt.ylabel('Team', fontsize=14, fontweight='bold')
plt.title('Top 10 Teams by Strength Rating', fontsize=20, fontweight='bold', pad=20)
plt.grid(axis='x', linestyle='--', alpha=0.7)

# Add value labels and conference info
for i, (idx, row) in enumerate(top_teams.iterrows()):
    plt.text(row['STR'] + 0.01, i, f"{row['STR']:.2f} ({row['Conference']})", 
             va='center', fontsize=10, fontweight='bold')

plt.gca().invert_yaxis()  # Highest at top
plt.tight_layout()
plt.savefig('top_teams_by_strength.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"🥇 Strongest team: {top_teams.iloc[0]['Team']} ({top_teams.iloc[0]['STR']:.2f})")
print(f"🏆 Conference representation in top 10:")
print(top_teams['Conference'].value_counts())

## 3. Goal Distribution Analysis
Show the distribution of goals scored by home and away teams.

In [None]:
# Goal Distribution Analysis
plt.figure(figsize=(12, 8))

# Create overlapping histograms
plt.hist(df['home_team_score'], bins=range(0, max(df['home_team_score'])+2), 
         alpha=0.7, color='blue', label='Home Goals', density=True, edgecolor='black')
plt.hist(df['away_team_score'], bins=range(0, max(df['away_team_score'])+2), 
         alpha=0.7, color='red', label='Away Goals', density=True, edgecolor='black')

# Add KDE curves
from scipy import stats
x_range = np.linspace(0, max(max(df['home_team_score']), max(df['away_team_score'])), 100)
home_kde = stats.gaussian_kde(df['home_team_score'])
away_kde = stats.gaussian_kde(df['away_team_score'])

plt.plot(x_range, home_kde(x_range), color='darkblue', linewidth=3, label='Home KDE')
plt.plot(x_range, away_kde(x_range), color='darkred', linewidth=3, label='Away KDE')

plt.title('Distribution of Goals Scored (Home vs Away)', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('Goals Scored', fontsize=14, fontweight='bold')
plt.ylabel('Density', fontsize=14, fontweight='bold')
plt.legend(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add statistics text
home_avg = df['home_team_score'].mean()
away_avg = df['away_team_score'].mean()
plt.text(0.7, 0.8, f'Home Avg: {home_avg:.2f}\nAway Avg: {away_avg:.2f}\nHome Advantage: {home_avg-away_avg:.2f}', 
         transform=plt.gca().transAxes, fontsize=12, fontweight='bold',
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.tight_layout()
plt.savefig('goal_distribution_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"⚽ Home team average: {home_avg:.2f} goals")
print(f"🏃 Away team average: {away_avg:.2f} goals")
print(f"🏠 Home advantage: {home_avg-away_avg:.2f} goals")

## 4. Team Performance Radar Chart
Visualize a specific team's performance metrics as a radar chart.

In [None]:
# Team Performance Radar Chart
from math import pi

# Select a team (you can change this to any team)
team_name = overall_df.sort_values('STR', ascending=False).iloc[0]['Team']  # Top team
print(f"Creating radar chart for: {team_name}")

# Get team data and normalize for visualization
team_data = overall_df[overall_df['Team'] == team_name].iloc[0]
categories = ['Attack Rating', 'Defense Rating', 'Goals For', 'Goals Against (Inv.)', 'Matches Played']

# Normalize values for better visualization (scale 0-100)
att_norm = (team_data['ATT'] - overall_df['ATT'].min()) / (overall_df['ATT'].max() - overall_df['ATT'].min()) * 100
def_norm = 100 - ((team_data['DEF'] - overall_df['DEF'].min()) / (overall_df['DEF'].max() - overall_df['DEF'].min()) * 100)  # Inverted (lower is better)
gf_norm = (team_data['GF'] - overall_df['GF'].min()) / (overall_df['GF'].max() - overall_df['GF'].min()) * 100
ga_norm = 100 - ((team_data['GA'] - overall_df['GA'].min()) / (overall_df['GA'].max() - overall_df['GA'].min()) * 100)  # Inverted
mp_norm = (team_data['MP'] - overall_df['MP'].min()) / (overall_df['MP'].max() - overall_df['MP'].min()) * 100

values = [att_norm, def_norm, gf_norm, ga_norm, mp_norm]
values += values[:1]  # Complete the circle

# Calculate angles for each category
angles = [n / float(len(categories)) * 2 * pi for n in range(len(categories))]
angles += angles[:1]

# Create the radar chart
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
ax.set_facecolor('#f8f9fa')

# Plot the data
ax.plot(angles, values, linewidth=3, linestyle='solid', label=team_name, color='#2E86AB')
ax.fill(angles, values, color='#2E86AB', alpha=0.25)

# Customize the chart
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, fontsize=12, fontweight='bold')
ax.set_ylim(0, 100)
ax.set_yticks([20, 40, 60, 80, 100])
ax.set_yticklabels(['20', '40', '60', '80', '100'], fontsize=10)
ax.grid(True, alpha=0.3)

# Add title and legend
plt.title(f'Performance Radar: {team_name}\n({team_data["Conference"]})', 
          size=16, fontweight='bold', y=1.1)
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0), fontsize=12)

# Add values as text
for angle, value, category in zip(angles[:-1], values[:-1], categories):
    ax.text(angle, value + 5, f'{value:.0f}', ha='center', va='center', 
            fontsize=10, fontweight='bold', color='#2E86AB')

plt.tight_layout()
plt.savefig(f'{team_name.replace(" ", "_")}_radar_chart.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"📊 Radar chart created for {team_name}")
print(f"🎯 Team strength: {team_data['STR']:.2f}")

## 5. Win Probability Heatmap
Show a heatmap of win probabilities for matchups between top teams.

In [None]:
# Win Probability Heatmap
# Select top 8 teams for cleaner visualization
top_teams_list = overall_df.sort_values(by='STR', ascending=False).head(8)['Team'].tolist()
probabilities = []

print("Calculating win probabilities for top teams...")

for home in top_teams_list:
    row = []
    for away in top_teams_list:
        if home == away:
            row.append(np.nan)  # No match against itself
        else:
            prob = calculateMatchProbability(home, away)['Home Win Probability'] * 100
            row.append(prob)
    probabilities.append(row)

# Create DataFrame for heatmap
prob_df = pd.DataFrame(probabilities, index=top_teams_list, columns=top_teams_list)

# Create the heatmap
plt.figure(figsize=(12, 10))
mask = np.isnan(prob_df)  # Mask diagonal
sns.heatmap(prob_df, annot=True, cmap='RdYlBu_r', fmt='.1f', 
            cbar_kws={'label': 'Home Win Probability (%)'}, 
            mask=mask, square=True, linewidths=0.5,
            vmin=30, vmax=70)  # Center around 50%

plt.title('Win Probability Heatmap\n(Top 8 Teams - Home Advantage)', 
          fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Away Team', fontsize=14, fontweight='bold')
plt.ylabel('Home Team', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)

plt.tight_layout()
plt.savefig('win_probability_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"🔥 Heatmap created for top 8 teams")
print(f"📈 Highest home win probability: {prob_df.max().max():.1f}%")
print(f"📉 Lowest home win probability: {prob_df.min().min():.1f}%")

## 6. Conference Animation Generator
Create animations for different conferences easily.

In [None]:
# Conference Animation Generator
# List of interesting conferences to animate
conferences_to_animate = [
    "Big East",
    "ACC", 
    "Big Ten",
    "Pac-12",
    "SEC"
]

print("🎬 Creating animations for multiple conferences...")

# Check which conferences are available
available_conferences = overall_df['Conference'].unique()
print(f"Available conferences: {sorted(available_conferences)}")

# Create animations for available conferences
for conf in conferences_to_animate:
    if conf in available_conferences:
        print(f"\n🎯 Creating animation for {conf}...")
        try:
            animation_result = create_conference_animation(
                conference_name=conf,
                num_teams=6,  # Show top 6 teams
                output_format="gif"
            )
            print(f"✅ {conf} animation created successfully!")
        except Exception as e:
            print(f"❌ Error creating {conf} animation: {e}")
    else:
        print(f"⚠️ {conf} not found in available conferences")

print("\n🎬 Animation generation complete!")

## 7. Strength vs Goals Scatter Plot
Show the relationship between team strength and goals scored.

In [None]:
# Strength vs Goals Scatter Plot
plt.figure(figsize=(14, 10))

# Create scatter plot with conference colors
conferences = overall_df['Conference'].unique()
colors = plt.cm.Set3(np.linspace(0, 1, len(conferences)))
conference_colors = dict(zip(conferences, colors))

for conf in conferences:
    conf_data = overall_df[overall_df['Conference'] == conf]
    plt.scatter(conf_data['STR'], conf_data['GF'], 
               label=conf, alpha=0.7, s=80, 
               color=conference_colors[conf], edgecolors='black', linewidth=0.5)

# Add trend line
from scipy import stats
slope, intercept, r_value, p_value, std_err = stats.linregress(overall_df['STR'], overall_df['GF'])
line = slope * overall_df['STR'] + intercept
plt.plot(overall_df['STR'], line, 'r--', alpha=0.8, linewidth=2, 
         label=f'Trend Line (R² = {r_value**2:.3f})')

# Annotate top teams
top_5_teams = overall_df.nlargest(5, 'STR')
for idx, row in top_5_teams.iterrows():
    plt.annotate(row['Team'], (row['STR'], row['GF']), 
                xytext=(5, 5), textcoords='offset points', 
                fontsize=9, fontweight='bold', alpha=0.8,
                bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))

plt.title('Team Strength vs Goals Scored\n(Size = Performance, Color = Conference)', 
          fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Team Strength Rating', fontsize=14, fontweight='bold')
plt.ylabel('Goals Scored', fontsize=14, fontweight='bold')
plt.grid(alpha=0.3)

# Legend with fewer conferences for clarity
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), 
          bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)

plt.tight_layout()
plt.savefig('strength_vs_goals_scatter.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"📊 Scatter plot created!")
print(f"📈 Correlation coefficient: {r_value:.3f}")
print(f"🎯 R-squared value: {r_value**2:.3f}")
if r_value > 0.5:
    print("✅ Strong positive correlation between strength and goals!")
elif r_value > 0.3:
    print("📊 Moderate positive correlation between strength and goals.")
else:
    print("⚠️ Weak correlation between strength and goals.")

## Summary

This notebook created 7 different visualization types:

1. **Conference Strength Comparison** - Horizontal bar chart comparing conferences
2. **Top Teams by Strength** - Ranked visualization of strongest teams
3. **Goal Distribution Analysis** - Home vs Away goal patterns
4. **Team Performance Radar Chart** - Multi-metric team analysis
5. **Win Probability Heatmap** - Head-to-head matchup predictions
6. **Conference Animation Generator** - Multiple dynamic championship race animations
7. **Strength vs Goals Scatter Plot** - Correlation analysis with trend lines

Each visualization serves different content purposes:
- **Static posts** for detailed analysis (charts 1, 2, 3, 7)
- **Individual team highlights** (chart 4)
- **Matchup predictions** (chart 5)
- **Dynamic social media content** (chart 6)

All visualizations are saved as high-resolution PNG files for sharing!