# Dataset Analysis: Player Career Statistics

This notebook explores and analyzes the **Total Career Stats** dataset, containing gaming statistics for players including total kills, wins, and revives. The dataset provides valuable insights into player performance.


##  2. Data Import and Initial Inspection

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
data = pd.read_csv('Total Career Stats.csv')

# Initial inspection
print(data.head())
print(data.info())
print(data.describe())


## 3. Data Cleaning and Preparation

In [None]:
# Check for missing values
print("Missing values:\n", data.isnull().sum())

# Handle missing values (dropping rows with missing player names)
data_clean = data.dropna(subset=['player_name'])

# Confirm no missing values
print("\nAfter cleaning:\n", data_clean.isnull().sum())

# Check and display duplicates
duplicates = data_clean[data_clean.duplicated(keep=False)]

print(f"\nNumber of duplicate rows found: {duplicates.shape[0]}")

# Display duplicate rows clearly
if duplicates.shape[0] > 0:
    print("\nDuplicate rows:")
    display(duplicates.sort_values(by='player_name'))
else:
    print("\nNo duplicate rows found.")



### Code to Remove Duplicate Player Names:

In [None]:
# Check initial number of rows
print("Initial number of rows:", data_clean.shape[0])

# Remove duplicates based on player_name (keep first occurrence)
data_clean = data_clean.drop_duplicates(subset=['player_name'], keep='first')

# Confirm duplicates are removed
print("Number of rows after removing duplicates:", data_clean.shape[0])

# Verify no duplicates remain
duplicates_remaining = data_clean['player_name'].duplicated().sum()
print("Duplicates remaining:", duplicates_remaining)


## 4. Exploratory Data Analysis

### Descriptive Statistics

In [None]:
# Summary statistics
print(data_clean.describe())


###  Distribution Visualizations

In [None]:
# Histograms
plt.figure(figsize=(14, 4))

plt.subplot(1, 3, 1)
sns.histplot(data_clean['career_kills'], bins=30, kde=True)
plt.title('Distribution of Career Kills')

plt.subplot(1, 3, 2)
sns.histplot(data_clean['career_wins'], bins=30, kde=True)
plt.title('Distribution of Career Wins')

plt.subplot(1, 3, 3)
sns.histplot(data_clean['career_revives'], bins=30, kde=True)
plt.title('Distribution of Career Revives')

plt.tight_layout()
plt.show()


### Boxplots for Outliers

In [None]:
# Boxplots
plt.figure(figsize=(14, 4))

plt.subplot(1, 3, 1)
sns.boxplot(x=data_clean['career_kills'])
plt.title('Boxplot of Career Kills')

plt.subplot(1, 3, 2)
sns.boxplot(x=data_clean['career_wins'])
plt.title('Boxplot of Career Wins')

plt.subplot(1, 3, 3)
sns.boxplot(x=data_clean['career_revives'])
plt.title('Boxplot of Career Revives')

plt.tight_layout()
plt.show()


### Correlation Analysis

In [None]:
# Correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(data_clean[['career_kills', 'career_wins', 'career_revives']].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation between Career Statistics')
plt.show()


##  5. Analysis and Insights

###  Top Players Analysis (Leaderboard)

In [None]:
# Function to display top players anonymously
def show_top_players(df, column, top_n=10):
    top_players = df.sort_values(by=column, ascending=False).head(top_n).copy()
    top_players.reset_index(drop=True, inplace=True)
    top_players.index += 1  # Start ranking from 1
    top_players['Player_ID'] = ['Player_' + str(i) for i in top_players.index]
    return top_players[['Player_ID', column]]

# Display top players anonymously
print("🏆 Top 10 Players by Kills:\n", show_top_players(data_clean, 'career_kills'))

print("\n🏆 Top 10 Players by Wins:\n", show_top_players(data_clean, 'career_wins'))

print("\n🏆 Top 10 Players by Revives:\n", show_top_players(data_clean, 'career_revives'))



### Scatter Plots for Relationships

In [None]:
# Scatter plot: Kills vs Wins
sns.scatterplot(x='career_kills', y='career_wins', data=data_clean)
plt.title('Career Kills vs Wins')
plt.xlabel('Career Kills')
plt.ylabel('Career Wins')
plt.show()

# Scatter plot: Kills vs Revives
sns.scatterplot(x='career_kills', y='career_revives', data=data_clean)
plt.title('Career Kills vs Revives')
plt.xlabel('Career Kills')
plt.ylabel('Career Revives')
plt.show()
