# Employee Attrition Prediction
This notebook explores the IBM HR Analytics dataset to understand employee attrition patterns and key influencing factors.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
file_path = "WA_Fn-UseC_-HR-Employee-Attrition.csv"  # Update path if needed
df = pd.read_csv(file_path)

# Convert Attrition to binary (Yes=1, No=0)
df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0})

# Display basic info
df.info(), df.head()

In [None]:
# Attrition Rate Analysis
attrition_rate = df['Attrition'].mean() * 100

# Plot Attrition Distribution
plt.figure(figsize=(6,4))
sns.countplot(x=df['Attrition'], palette='coolwarm')
plt.xticks([0, 1], ['No Attrition', 'Attrition'])
plt.xlabel("Attrition")
plt.ylabel("Count")
plt.title(f"Attrition Distribution (Rate: {attrition_rate:.2f}%)")
plt.show()

In [None]:
# Correlation Analysis
correlation = df.corr()['Attrition'].sort_values(ascending=False)

# Top correlated features
top_corr_features = correlation[1:11]  # Exclude Attrition itself

# Plot top correlated features
plt.figure(figsize=(8, 6))
sns.barplot(x=top_corr_features.values, y=top_corr_features.index, palette="coolwarm")
plt.xlabel("Correlation with Attrition")
plt.ylabel("Features")
plt.title("Top 10 Features Correlated with Attrition")
plt.show()

# Display correlation values
top_corr_features