In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import iqr

# Generate rich gaming dataset
np.random.seed(42)

gaming_data = {
    'daily_active_users': np.concatenate([
        np.random.normal(1000, 200, 200),    # Regular days
        np.random.normal(2000, 300, 100),    # Weekends
        np.random.normal(3000, 400, 50)      # Special events
    ]),

    'session_duration_mins': np.concatenate([
        np.random.normal(20, 5, 150),        # Casual players
        np.random.normal(60, 15, 150),       # Regular players
        np.random.normal(180, 30, 50)        # Hardcore players
    ]),

    'in_game_purchases': np.concatenate([
        np.random.exponential(5, 250),       # Small purchases
        np.random.normal(50, 10, 100)        # Battle pass/Major items
    ]),

    'player_level': np.concatenate([
        np.random.normal(10, 3, 100),        # New players
        np.random.normal(30, 5, 150),        # Regular players
        np.random.normal(70, 10, 100)        # Veterans
    ]),

    'matches_played': np.concatenate([
        np.random.poisson(3, 200),           # Casual players
        np.random.poisson(10, 150)           # Competitive players
    ])
}

df = pd.DataFrame(gaming_data)

In [28]:
print(df.describe())

       daily_active_users  session_duration_mins  in_game_purchases  \
count          350.000000             350.000000         350.000000   
mean          1574.170504              59.693193          18.111394   
std            781.613616              54.792551          21.321402   
min            476.050979               8.490394           0.055284   
25%            961.862906              20.639101           2.208069   
50%           1189.465625              45.327608           6.481960   
75%           2148.279898              68.530370          38.183416   
max           3836.954910             257.200794          71.632547   

       player_level  matches_played  
count    350.000000      350.000000  
mean      36.142872        6.034286  
std       23.940125        4.315491  
min        1.454372        0.000000  
25%       13.999205        2.250000  
50%       30.457091        5.000000  
75%       59.049238        9.000000  
max       94.929995       21.000000  


In [None]:
def stats(data):
    indexes = ['mean', 'median', 'min', 'max', 'range', 'iqr', 'q1', 'q3', 'std', 'skewness', 'kurtosis', 'cv']
    statsFrame = pd.DataFrame(columns=data.columns, index=indexes)

    for col in data.columns:
        statsFrame.loc['mean', col] = data[col].mean()
        statsFrame.loc['median', col] = data[col].median()
        statsFrame.loc['min', col] = data[col].min()
        statsFrame.loc['max', col] = data[col].max()
        statsFrame.loc['range', col] = data[col].max() - data[col].min()
        statsFrame.loc['iqr', col] = iqr(data[col])
        statsFrame.loc['q1', col] = data[col].quantile(0.25)
        statsFrame.loc['q3', col] = data[col].quantile(0.75)
        statsFrame.loc['std', col] = data[col].std()
        statsFrame.loc['skewness', col] = data[col].skew()
        statsFrame.loc['kurtosis', col] = data[col].kurt()
        statsFrame.loc['cv', col] = (data[col].std() / data[col].mean()) * 100

    return statsFrame

summary_stats = stats(df)

In [31]:
def visualizations(data):
    plt.figure(figsize = (20,12))

    for i, col in enumerate(data.columns, 1):
        plt.subplot(3, 2, i)
        sns.histplot(data[col], kde=True, stat='density')
        plt.xlabel(col)
        plt.ylabel('count')
        plt.axvline(data[col].quantile(0.25), color='purple', linestyle=':', label='Q1')
        plt.axvline(data[col].quantile(0.75), color='purple', linestyle=':', label='Q3')
        plt.axvline(data[col].mean(), color='red', linestyle='--', label='Mean')
        plt.axvline(data[col].median(), color='blue', linestyle='--', label='Median')
        plt.axvline(data[col].mean() - data[col].std(), color='yellow', linestyle='--', label='Std. Dev -')
        plt.axvline(data[col].mean() + data[col].std(), color='yellow', linestyle='--', label='Std. Dev +')
        plt.legend()

    plt.tight_layout()
    plt.show()

print(summary_stats.round(2))
visualizations(df)