# üöÄ Quick Start Guide - Injury Data Analysis

This notebook shows how to get started with the processed data.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

print("‚úÖ Libraries imported")

## 2. Loading Data

Main file: `injury_data_consolidated.xlsx` (12 sheets)

In [None]:
# Path to file
FILE = 'injury_data_consolidated.xlsx'

# Check available sheets
excel_file = pd.ExcelFile(FILE)
print("üìä Available sheets:")
for i, sheet in enumerate(excel_file.sheet_names, 1):
    print(f"   {i:2d}. {sheet}")

In [None]:
# Load key datasets
df_nba = pd.read_excel(FILE, sheet_name='NBA_Processed')
df_wnba = pd.read_excel(FILE, sheet_name='WNBA_Processed')
df_basketball = pd.read_excel(FILE, sheet_name='Basketball_Combined')
df_soccer = pd.read_excel(FILE, sheet_name='Soccer_Processed')
df_collegiate = pd.read_excel(FILE, sheet_name='Collegiate_Processed')

print(f"‚úÖ NBA: {df_nba.shape[0]} rows")
print(f"‚úÖ WNBA: {df_wnba.shape[0]} rows")
print(f"‚úÖ Basketball Combined: {df_basketball.shape[0]} rows")
print(f"‚úÖ Soccer: {df_soccer.shape[0]} rows")
print(f"‚úÖ Collegiate: {df_collegiate.shape[0]} rows")

## 3. Basic Exploration

In [None]:
# Example: NBA
print("üîç SAMPLE NBA DATA:")
print("\nColumns:")
print(df_nba.columns.tolist())

print("\nFirst 5 rows:")
display(df_nba.head())

In [None]:
# Check unique Period values
print("üìä Period distribution (NBA):")
print(df_nba['Period'].value_counts())

print("\nüìä Unique players:")
players = df_nba[df_nba['Period'] == 'Summary Before']['Player_Name'].tolist()
for i, player in enumerate(players, 1):
    print(f"   {i:2d}. {player}")

## 4. EXAMPLE: Before/After Comparison for One Player

In [None]:
# Pick a player
player_name = 'Derrick Rose'  # Change to any player

# Filter data
player_data = df_nba[
    (df_nba['Player_Name'] == player_name) & 
    (df_nba['Period'].isin(['Summary Before', 'Summary After']))
]

print(f"üìä {player_name} - Stats before and after ACL injury:")
display(player_data[['Player_Name', 'Period', 'games played', 'PTS', 'AST', 'REB', 'FG%', '3PT%', 'FT%']])

In [None]:
# Calculate percentage changes
before = player_data[player_data['Period'] == 'Summary Before'].iloc[0]
after = player_data[player_data['Period'] == 'Summary After'].iloc[0]

stats = ['PTS', 'AST', 'REB', 'FG%', '3PT%', 'FT%']
changes = {}

print(f"\nüìà Percentage changes for {player_name}:")
for stat in stats:
    b = before[stat]
    a = after[stat]
    if pd.notna(b) and pd.notna(a) and b != 0:
        change = ((a - b) / b) * 100
        changes[stat] = change
        print(f"   {stat:10s}: {b:6.2f} ‚Üí {a:6.2f} ({change:+6.1f}%)")
    else:
        print(f"   {stat:10s}: no data")

## 5. EXAMPLE: Before/After Visualization

In [None]:
# Bar chart comparing before/after
stats_to_plot = ['PTS', 'AST', 'REB']
before_vals = [before[s] for s in stats_to_plot]
after_vals = [after[s] for s in stats_to_plot]

x = np.arange(len(stats_to_plot))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x - width/2, before_vals, width, label='Before Injury', color='steelblue')
ax.bar(x + width/2, after_vals, width, label='After Injury', color='coral')

ax.set_ylabel('Value per game')
ax.set_title(f'{player_name} - Before vs After ACL Comparison')
ax.set_xticks(x)
ax.set_xticklabels(stats_to_plot)
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 6. EXAMPLE: NBA vs WNBA Comparison

In [None]:
# Only summary before/after
summary = df_basketball[df_basketball['Period'].isin(['Summary Before', 'Summary After'])].copy()

# Average stats per League and Period
avg_stats = summary.groupby(['League', 'Period'])[['PTS', 'AST', 'REB']].mean()

print("üìä Average stats NBA vs WNBA (before/after injury):")
display(avg_stats.round(2))

In [None]:
# Comparison chart
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, stat in enumerate(['PTS', 'AST', 'REB']):
    data_to_plot = summary.pivot_table(values=stat, index='Period', columns='League')
    data_to_plot.plot(kind='bar', ax=axes[i], color=['steelblue', 'coral'])
    axes[i].set_title(f'{stat} - NBA vs WNBA')
    axes[i].set_ylabel('Value per game')
    axes[i].set_xlabel('')
    axes[i].legend(title='League')
    axes[i].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 7. EXAMPLE: Soccer Injury Analysis

In [None]:
# Top 10 most common injury categories
injury_counts = df_soccer['Injury_Category'].value_counts().head(10)

print("üìä Top 10 injury categories (Soccer):")
print(injury_counts)

# Chart
fig, ax = plt.subplots(figsize=(12, 6))
injury_counts.plot(kind='barh', ax=ax, color='seagreen')
ax.set_title('Top 10 Most Common Injuries in Soccer')
ax.set_xlabel('Number of cases')
ax.set_ylabel('Injury category')
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Average time absent per injury category
avg_absence = df_soccer.groupby('Injury_Category')['Days_Absent'].agg(['mean', 'median', 'count']).sort_values('mean', ascending=False).head(10)

print("üìä Average time absent per injury category (top 10):")
display(avg_absence.round(1))

## 8. EXAMPLE: Collegiate - ACL Risk

In [None]:
# Risk category distribution
risk_dist = df_collegiate['ACL_Risk_Category'].value_counts()

print("üìä ACL risk category distribution (Collegiate):")
print(risk_dist)

# Pie chart
fig, ax = plt.subplots(figsize=(8, 8))
risk_dist.plot(kind='pie', ax=ax, autopct='%1.1f%%', startangle=90, colors=['lightgreen', 'yellow', 'orange', 'red'])
ax.set_title('ACL Risk Category Distribution')
ax.set_ylabel('')

plt.tight_layout()
plt.show()

In [None]:
# Risk comparison by gender
risk_by_gender = df_collegiate.groupby(['Gender', 'ACL_Risk_Category']).size().unstack(fill_value=0)

print("üìä ACL risk by gender:")
display(risk_by_gender)

# Chart
risk_by_gender.plot(kind='bar', stacked=False, figsize=(10, 6), color=['lightgreen', 'yellow', 'orange', 'red'])
plt.title('ACL Risk Distribution by Gender')
plt.xlabel('Gender')
plt.ylabel('Number of athletes')
plt.legend(title='Risk Category')
plt.xticks(rotation=0)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 9. Your Analyses - Template

Below is an empty template for your own analyses:

In [None]:
# TODO: Your analysis

# Example: Compare two players
player1 = 'Derrick Rose'
player2 = 'Klay Thompson'

# Your code here...


## üí° Tips

### Filtering data:
```python
# Only NBA
nba_only = df_basketball[df_basketball['League'] == 'NBA']

# Only summary (before/after)
summary = df_nba[df_nba['Period'].isin(['Summary Before', 'Summary After'])]

# Only a specific player
player = df_nba[df_nba['Player_Name'] == 'Derrick Rose']

# Soccer - only ACL
acl_soccer = df_soccer[df_soccer['Injury_Category'] == 'Knee - Cruciate Ligament']
```

### Calculating changes:
```python
change_pct = ((after - before) / before) * 100
```

### Grouping:
```python
df.groupby(['League', 'Period'])['PTS'].mean()
```

---

**Good luck with your analysis! üöÄ**
