# Unveiling Hidden Patterns and Optimal Strategies with Machine Learning
## Prepared by: Rob Eugene A. Dequiñon and John Kenneth P. Alon


### Chosen Dataset: Left 4 Dead 2 (2009) Player Stats
<i><b> Link: </b>https://www.kaggle.com/datasets/jacklacey/left-4-dead-2-20000-player-stats </i>

# Part A: Data Acquisition & Preprocessing

In [1]:
# Import necessary libraries for EDA
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Load the dataset
df = pd.read_csv('l4d2_player_stats_final.csv')

# Display the first 5 rows
df.head()

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Basic info about the dataset
print('Shape:', df.shape)
print('\nInfo:')
df.info()
print('\nMissing values:')
print(df.isnull().sum())

In [None]:
# Descriptive statistics for numerical columns
df.describe()

In [None]:
# Value counts for categorical columns
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
for col in categorical_cols:
    print(f'\nValue counts for {col}:')
    print(df[col].value_counts())

In [None]:
# Visual EDA: Histograms for numerical columns
df.hist(figsize=(16, 12), bins=30)
plt.suptitle('Histograms of Numerical Features')
plt.show()

# Visual EDA: Countplots for categorical columns
for col in categorical_cols:
    plt.figure(figsize=(8, 4))
    sns.countplot(data=df, x=col)
    plt.title(f'Countplot of {col}')
    plt.xticks(rotation=45)
    plt.show()

In [None]:
# Correlation heatmap for numerical features
plt.figure(figsize=(14, 10))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap of Numerical Features')
plt.show()