In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load dataset
df = pd.read_csv('mw_pw_profiles.csv')

# Preview data
print("\n--- Dataset Info ---")
df.info()

print("\n--- Summary Statistics ---")
print(df.describe())

print("\n--- Missing Values ---")
print(df.isnull().sum())

# Distribution of Runs
plt.figure(figsize=(8, 4))
sns.histplot(df['runs'].dropna(), bins=30)
plt.title("Distribution of Runs")
plt.xlabel("Runs")
plt.ylabel("Frequency")
plt.show()

# Scatter: Runs vs Balls Faced
plt.figure(figsize=(8, 4))
sns.scatterplot(data=df, x='balls_faced', y='runs')
plt.title("Runs vs Balls Faced")
plt.show()

# Feature Engineering Example: Player Consistency
player_stats = df.groupby('player_name')['runs'].agg(['mean', 'std']).reset_index()
player_stats['consistency'] = player_stats['mean'] / player_stats['std']

print("\n--- Top 5 Consistent Players ---")
print(player_stats.sort_values('consistency', ascending=False).head())