In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

In [14]:
# Read the CSV file
df = pd.read_csv('player_game_statistics_old.csv')

In [15]:
def analyze_game_popularity():
    """Analyze game popularity metrics"""
    # Games by total players
    game_stats = df.groupby('game_name').agg({
        'player_id': 'count',
        'total_games_played': 'sum',
        'total_wins': 'sum',
        'rating': 'mean',
        'churned': lambda x: (x == 'YES').mean() * 100  # Churn rate as percentage
    }).round(2)
    
    game_stats.columns = ['Total Players', 'Total Games Played', 
                         'Total Wins', 'Average Rating', 'Churn Rate (%)']
    game_stats = game_stats.sort_values('Total Players', ascending=False)
    
    return game_stats

In [16]:
def analyze_player_activity_by_country():
    """Analyze player activity metrics by country"""
    country_stats = df.groupby('country').agg({
        'player_id': 'count',
        'total_games_played': ['sum', 'mean'],
        'win_ratio': 'mean',
        'rating': 'mean',
        'churned': lambda x: (x == 'YES').mean() * 100  # Churn rate as percentage
    }).round(2)
    
    country_stats.columns = ['Total Players', 'Total Games', 'Avg Games per Player',
                           'Avg Win Ratio', 'Avg Rating', 'Churn Rate (%)']
    country_stats = country_stats.sort_values('Total Players', ascending=False)
    
    return country_stats

In [17]:
def additional_insights():
    """interesting insights"""
    insights = {
        'Player Level Distribution': df['player_level'].value_counts(),
        'Average Win Ratio by Level': df.groupby('player_level')['win_ratio'].mean(),
        'Gender Distribution': df['gender'].value_counts(),
        'Age Statistics': df['age'].describe(),
        'Correlation': df[['total_games_played', 'win_ratio', 'total_moves', 
                          'highest_score', 'rating']].corr()
    }
    
    # Calculate engagement metrics
    insights['Avg Games per Player'] = df['total_games_played'].mean()
    insights['Overall Churn Rate'] = (df['churned'] == 'YES').mean() * 100
    
    # Win ratio distribution
    insights['Win Ratio Distribution'] = df['win_ratio'].describe()
    
    return insights

In [20]:
# Generate all analyses
game_popularity = analyze_game_popularity()
display(game_popularity)

Unnamed: 0_level_0,Total Players,Total Games Played,Total Wins,Average Rating,Churn Rate (%)
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Battleship,500,3970,190,1.98,75.0
Hangman,317,2410,112,1.96,71.92
Dots and Boxes,308,2366,120,1.94,73.7
Chess,305,2270,116,1.91,73.11
Tic Tac Toe,289,2288,104,2.02,70.24
Memory Match,286,2190,113,1.86,71.68
Checkers,282,2284,99,2.05,70.57
Connect Four,282,2222,108,1.95,71.99


In [24]:
country_activity = analyze_player_activity_by_country()
display(country_activity)

Unnamed: 0_level_0,Total Players,Total Games,Avg Games per Player,Avg Win Ratio,Avg Rating,Churn Rate (%)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
UK,130,971,7.47,54.71,1.94,91.54
Netherlands,120,956,7.97,53.89,1.97,72.5
Philippines,116,903,7.78,52.1,2.03,72.41
Norway,108,821,7.6,57.93,1.97,89.81
Malaysia,107,853,7.97,46.8,1.94,83.18
Nigeria,106,840,7.92,51.18,2.06,76.42
Japan,104,769,7.39,51.39,1.93,90.38
Germany,104,855,8.22,45.83,1.94,60.58
Denmark,103,815,7.91,50.3,1.98,52.43
Brazil,99,719,7.26,46.96,1.87,64.65


In [19]:
# Create visualizations
plt.style.use('seaborn')
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

In [None]:
# 1. Game Popularity
game_popularity['Total Players'].head(10).plot(kind='bar', ax=axes[0,0])
axes[0,0].set_title('Top 10 Most Popular Games')
axes[0,0].set_xlabel('Game')
axes[0,0].set_ylabel('Number of Players')

In [None]:
# 2. Player Distribution by Country
country_activity['Total Players'].head(10).plot(kind='bar', ax=axes[0,1])
axes[0,1].set_title('Top 10 Countries by Player Count')
axes[0,1].set_xlabel('Country')
axes[0,1].set_ylabel('Number of Players')

In [None]:
# 3. Player Level Distribution
df['player_level'].value_counts().plot(kind='pie', ax=axes[1,0], autopct='%1.1f%%')
axes[1,0].set_title('Player Level Distribution')

In [None]:
# 4. Win Ratio by Player Level
df.groupby('player_level')['win_ratio'].mean().plot(kind='bar', ax=axes[1,1])
axes[1,1].set_title('Average Win Ratio by Player Level')
axes[1,1].set_xlabel('Player Level')
axes[1,1].set_ylabel('Average Win Ratio')

In [None]:
plt.tight_layout()

In [None]:
# Print summary statistics
print("\n=== Most Popular Games ===")
print(game_popularity.head())


In [None]:
print("\n=== Player Activity by Country ===")
print(country_activity.head())

In [None]:
print("\n=== Additional Insights ===")
for key, value in additional_insights.items():
    print(f"\n{key}:")
    print(value)