# League of Legends Match Analysis

This notebook analyzes the match data collected using the Riot Games API.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
plt.style.use('ggplot')
sns.set(font_scale=1.2)

In [None]:
# Load the match data
df = pd.read_csv('../data/lol_match_data.csv')

# Display basic information
print(f"Total matches: {len(df)}")
df.head()

In [None]:
# Basic statistics
df.describe()

## Win Rate Analysis

In [None]:
# Calculate win rate
win_rate = df['win'].mean() * 100
print(f"Overall win rate: {win_rate:.2f}%")

# Win rate by champion
champion_stats = df.groupby('champion').agg({
    'win': ['mean', 'count'],
    'kills': 'mean',
    'deaths': 'mean',
    'assists': 'mean',
    'kda': 'mean'
}).sort_values(('win', 'count'), ascending=False)

# Convert win rate to percentage
champion_stats[('win', 'mean')] = champion_stats[('win', 'mean')] * 100

# Display champion stats
champion_stats.head(10)

## Performance Metrics Visualization

In [None]:
# Plot KDA distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['kda'], bins=20)
plt.title('KDA Distribution')
plt.xlabel('KDA')
plt.ylabel('Frequency')
plt.axvline(df['kda'].mean(), color='red', linestyle='--', label=f'Mean: {df["kda"].mean():.2f}')
plt.legend()
plt.show()

In [None]:
# Plot CS per minute vs. KDA
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='cs_per_min', y='kda', hue='win')
plt.title('CS per Minute vs. KDA')
plt.xlabel('CS per Minute')
plt.ylabel('KDA')
plt.show()

## Game Duration Analysis

In [None]:
# Plot game duration distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['game_duration_minutes'], bins=15)
plt.title('Game Duration Distribution')
plt.xlabel('Game Duration (minutes)')
plt.ylabel('Frequency')
plt.axvline(df['game_duration_minutes'].mean(), color='red', linestyle='--', 
            label=f'Mean: {df["game_duration_minutes"].mean():.2f} min')
plt.legend()
plt.show()

In [None]:
# Analyze win rate by game duration
df['duration_bucket'] = pd.cut(df['game_duration_minutes'], bins=[0, 20, 25, 30, 35, 40, 100], 
                              labels=['<20', '20-25', '25-30', '30-35', '35-40', '>40'])

duration_win_rate = df.groupby('duration_bucket')['win'].agg(['mean', 'count'])
duration_win_rate['mean'] = duration_win_rate['mean'] * 100

plt.figure(figsize=(10, 6))
ax = sns.barplot(x=duration_win_rate.index, y=duration_win_rate['mean'])
plt.title('Win Rate by Game Duration')
plt.xlabel('Game Duration (minutes)')
plt.ylabel('Win Rate (%)')

# Add count labels
for i, count in enumerate(duration_win_rate['count']):
    ax.text(i, 5, f'n={count}', ha='center')

plt.show()