# HR Analytics Dashboard
## Key Metrics: 1,400+ Employees, 16.12% Attrition Rate, 37 Average Age
### Insights: R&D 56% Attrition, Technical Background 24% Attrition, Gender Analysis


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("üìä HR Analytics Dashboard - Libraries Imported Successfully!")


In [None]:
# Load HR Analytics Data
df = pd.read_csv('hr_analytics_data.csv')

# Display basic info
print(f"üìà Dataset Overview:")
print(f"Total Employees: {len(df):,}")
print(f"Overall Attrition Rate: {(df['attrition'] == 'Yes').mean()*100:.2f}%")
print(f"Average Age: {df['age'].mean():.1f} years")
print(f"Average Salary: ${df['salary'].mean():,.0f}")

df.head()


## üìä Key Performance Indicators


In [None]:
# Create KPI Cards
fig, axes = plt.subplots(1, 4, figsize=(20, 6))
fig.suptitle('HR Analytics - Key Performance Indicators', fontsize=16, fontweight='bold')

# KPI 1: Total Employees
total_employees = len(df)
axes[0].text(0.5, 0.5, f'{total_employees:,}', ha='center', va='center', fontsize=24, fontweight='bold', color='#1f77b4')
axes[0].text(0.5, 0.2, 'Total Employees', ha='center', va='center', fontsize=12, color='gray')
axes[0].set_xlim(0, 1)
axes[0].set_ylim(0, 1)
axes[0].axis('off')

# KPI 2: Attrition Rate
attrition_rate = (df['attrition'] == 'Yes').mean() * 100
axes[1].text(0.5, 0.5, f'{attrition_rate:.2f}%', ha='center', va='center', fontsize=24, fontweight='bold', color='#d62728')
axes[1].text(0.5, 0.2, 'Attrition Rate', ha='center', va='center', fontsize=12, color='gray')
axes[1].set_xlim(0, 1)
axes[1].set_ylim(0, 1)
axes[1].axis('off')

# KPI 3: Average Age
avg_age = df['age'].mean()
axes[2].text(0.5, 0.5, f'{avg_age:.1f}', ha='center', va='center', fontsize=24, fontweight='bold', color='#2ca02c')
axes[2].text(0.5, 0.2, 'Average Age', ha='center', va='center', fontsize=12, color='gray')
axes[2].set_xlim(0, 1)
axes[2].set_ylim(0, 1)
axes[2].axis('off')

# KPI 4: Average Salary
avg_salary = df['salary'].mean()
axes[3].text(0.5, 0.5, f'${avg_salary:,.0f}', ha='center', va='center', fontsize=24, fontweight='bold', color='#ff7f0e')
axes[3].text(0.5, 0.2, 'Average Salary', ha='center', va='center', fontsize=12, color='gray')
axes[3].set_xlim(0, 1)
axes[3].set_ylim(0, 1)
axes[3].axis('off')

plt.tight_layout()
plt.show()

print("üì∏ Take screenshot of the KPI cards above")


## üè¢ Department-wise Attrition Analysis


In [None]:
# Department Attrition Analysis
dept_attrition = df.groupby('department').agg({
    'employee_id': 'count',
    'attrition': lambda x: (x == 'Yes').sum()
}).rename(columns={'employee_id': 'total_employees', 'attrition': 'employees_left'})

dept_attrition['attrition_rate'] = (dept_attrition['employees_left'] / dept_attrition['total_employees'] * 100).round(2)
dept_attrition = dept_attrition.sort_values('attrition_rate', ascending=True)

# Create bar chart
plt.figure(figsize=(12, 8))
bars = plt.barh(dept_attrition.index, dept_attrition['attrition_rate'], color='#1f77b4', alpha=0.7)

# Add value labels on bars
for i, (idx, row) in enumerate(dept_attrition.iterrows()):
    plt.text(row['attrition_rate'] + 0.5, i, f"{row['attrition_rate']:.1f}%", 
             va='center', fontweight='bold')

plt.title('Attrition Rate by Department', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Attrition Rate (%)', fontsize=12)
plt.ylabel('Department', fontsize=12)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

# Display the data table
print("\nüìä Department Attrition Summary:")
print(dept_attrition)
print("\nüì∏ Take screenshot of the department analysis chart above")


## üë• Gender-based Attrition Analysis


In [None]:
# Gender Attrition Analysis
gender_attrition = df.groupby('gender').agg({
    'employee_id': 'count',
    'attrition': lambda x: (x == 'Yes').sum()
}).rename(columns={'employee_id': 'total_employees', 'attrition': 'employees_left'})

gender_attrition['attrition_rate'] = (gender_attrition['employees_left'] / gender_attrition['total_employees'] * 100).round(2)

# Create comparison chart
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Pie chart for gender distribution
gender_dist = df['gender'].value_counts()
colors = ['#ff9999', '#66b3ff']
ax1.pie(gender_dist.values, labels=gender_dist.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax1.set_title('Gender Distribution', fontsize=14, fontweight='bold')

# Bar chart for gender attrition
bars = ax2.bar(gender_attrition.index, gender_attrition['attrition_rate'], color=['#ff9999', '#66b3ff'], alpha=0.7)
ax2.set_title('Attrition Rate by Gender', fontsize=14, fontweight='bold')
ax2.set_ylabel('Attrition Rate (%)')
ax2.set_ylim(0, max(gender_attrition['attrition_rate']) * 1.2)

# Add value labels on bars
for i, (idx, row) in enumerate(gender_attrition.iterrows()):
    ax2.text(i, row['attrition_rate'] + 0.2, f"{row['attrition_rate']:.1f}%", 
             ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

print("\nüìä Gender Attrition Summary:")
print(gender_attrition)
print("\nüì∏ Take screenshot of the gender analysis chart above")


## üíª Technical Background vs Attrition Analysis


In [None]:
# Technical Background Analysis
tech_attrition = df.groupby('technical_background').agg({
    'employee_id': 'count',
    'attrition': lambda x: (x == 'Yes').sum()
}).rename(columns={'employee_id': 'total_employees', 'attrition': 'employees_left'})

tech_attrition['attrition_rate'] = (tech_attrition['employees_left'] / tech_attrition['total_employees'] * 100).round(2)

# Create visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Pie chart for technical background distribution
tech_dist = df['technical_background'].value_counts()
colors = ['#ff9999', '#66b3ff']
ax1.pie(tech_dist.values, labels=tech_dist.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax1.set_title('Technical Background Distribution', fontsize=14, fontweight='bold')

# Bar chart for technical background attrition
bars = ax2.bar(tech_attrition.index, tech_attrition['attrition_rate'], color=['#ff9999', '#66b3ff'], alpha=0.7)
ax2.set_title('Attrition Rate by Technical Background', fontsize=14, fontweight='bold')
ax2.set_ylabel('Attrition Rate (%)')
ax2.set_ylim(0, max(tech_attrition['attrition_rate']) * 1.2)

# Add value labels on bars
for i, (idx, row) in enumerate(tech_attrition.iterrows()):
    ax2.text(i, row['attrition_rate'] + 0.5, f"{row['attrition_rate']:.1f}%", 
             ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

print("\nüìä Technical Background Attrition Summary:")
print(tech_attrition)
print("\nüì∏ Take screenshot of the technical background analysis chart above")


## üí∞ Salary Distribution Analysis


In [None]:
# Salary Distribution Analysis
plt.figure(figsize=(12, 6))

# Create histogram
plt.hist(df['salary'], bins=20, color='#2ca02c', alpha=0.7, edgecolor='black')
plt.title('Salary Distribution', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Salary ($)', fontsize=12)
plt.ylabel('Number of Employees', fontsize=12)
plt.grid(axis='y', alpha=0.3)

# Add mean line
mean_salary = df['salary'].mean()
plt.axvline(mean_salary, color='red', linestyle='--', linewidth=2, label=f'Mean: ${mean_salary:,.0f}')
plt.legend()

plt.tight_layout()
plt.show()

# Salary statistics
print("\nüìä Salary Statistics:")
print(f"Mean Salary: ${df['salary'].mean():,.0f}")
print(f"Median Salary: ${df['salary'].median():,.0f}")
print(f"Min Salary: ${df['salary'].min():,.0f}")
print(f"Max Salary: ${df['salary'].max():,.0f}")
print("\nüì∏ Take screenshot of the salary distribution chart above")


## üîç Key Insights Summary


## üìä Complete HR Analytics Dashboard


In [None]:
# Complete HR Analytics Dashboard - All Visualizations in One Page
fig = plt.figure(figsize=(24, 18))

# Create a comprehensive dashboard layout
gs = fig.add_gridspec(5, 4, hspace=0.4, wspace=0.3)

# Title
ax_title = fig.add_subplot(gs[0, :])
ax_title.text(0.5, 0.5, 'HR Analytics Dashboard - Complete Overview', 
             ha='center', va='center', fontsize=24, fontweight='bold', color='#1f77b4')
ax_title.axis('off')

# Row 1: KPI Cards (4 cards)
# KPI 1: Total Employees
ax1 = fig.add_subplot(gs[1, 0])
ax1.text(0.5, 0.6, f'{len(df):,}', ha='center', va='center', fontsize=20, fontweight='bold', color='#1f77b4')
ax1.text(0.5, 0.3, 'Total Employees', ha='center', va='center', fontsize=10, color='gray')
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)
ax1.axis('off')

# KPI 2: Attrition Rate
ax2 = fig.add_subplot(gs[1, 1])
attrition_rate = (df['attrition'] == 'Yes').mean() * 100
ax2.text(0.5, 0.6, f'{attrition_rate:.2f}%', ha='center', va='center', fontsize=20, fontweight='bold', color='#d62728')
ax2.text(0.5, 0.3, 'Attrition Rate', ha='center', va='center', fontsize=10, color='gray')
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.axis('off')

# KPI 3: Average Age
ax3 = fig.add_subplot(gs[1, 2])
avg_age = df['age'].mean()
ax3.text(0.5, 0.6, f'{avg_age:.1f}', ha='center', va='center', fontsize=20, fontweight='bold', color='#2ca02c')
ax3.text(0.5, 0.3, 'Average Age', ha='center', va='center', fontsize=10, color='gray')
ax3.set_xlim(0, 1)
ax3.set_ylim(0, 1)
ax3.axis('off')

# KPI 4: Average Salary
ax4 = fig.add_subplot(gs[1, 3])
avg_salary = df['salary'].mean()
ax4.text(0.5, 0.6, f'${avg_salary:,.0f}', ha='center', va='center', fontsize=20, fontweight='bold', color='#ff7f0e')
ax4.text(0.5, 0.3, 'Average Salary', ha='center', va='center', fontsize=10, color='gray')
ax4.set_xlim(0, 1)
ax4.set_ylim(0, 1)
ax4.axis('off')

# Row 2: Department Attrition Analysis
ax5 = fig.add_subplot(gs[2, 0])
dept_attrition = df.groupby('department').agg({
    'employee_id': 'count',
    'attrition': lambda x: (x == 'Yes').sum()
}).rename(columns={'employee_id': 'total_employees', 'attrition': 'employees_left'})
dept_attrition['attrition_rate'] = (dept_attrition['employees_left'] / dept_attrition['total_employees'] * 100).round(2)
dept_attrition = dept_attrition.sort_values('attrition_rate', ascending=True)

bars = ax5.barh(dept_attrition.index, dept_attrition['attrition_rate'], color='#1f77b4', alpha=0.7)
ax5.set_title('Department Attrition Rate', fontsize=12, fontweight='bold')
ax5.set_xlabel('Attrition Rate (%)')

# Row 2: Gender Distribution
ax6 = fig.add_subplot(gs[2, 1])
gender_dist = df['gender'].value_counts()
colors = ['#ff9999', '#66b3ff']
ax6.pie(gender_dist.values, labels=gender_dist.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax6.set_title('Gender Distribution', fontsize=12, fontweight='bold')

# Row 2: Technical Background Distribution
ax7 = fig.add_subplot(gs[2, 2])
tech_dist = df['technical_background'].value_counts()
colors = ['#ff9999', '#66b3ff']
ax7.pie(tech_dist.values, labels=tech_dist.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax7.set_title('Technical Background', fontsize=12, fontweight='bold')

# Row 2: Gender Attrition Comparison
ax8 = fig.add_subplot(gs[2, 3])
gender_attrition = df.groupby('gender').agg({
    'employee_id': 'count',
    'attrition': lambda x: (x == 'Yes').sum()
}).rename(columns={'employee_id': 'total_employees', 'attrition': 'employees_left'})
gender_attrition['attrition_rate'] = (gender_attrition['employees_left'] / gender_attrition['total_employees'] * 100).round(2)

bars = ax8.bar(gender_attrition.index, gender_attrition['attrition_rate'], color=['#ff9999', '#66b3ff'], alpha=0.7)
ax8.set_title('Gender Attrition Rate', fontsize=12, fontweight='bold')
ax8.set_ylabel('Attrition Rate (%)')

# Row 3: Technical Background Attrition
ax9 = fig.add_subplot(gs[3, 0])
tech_attrition = df.groupby('technical_background').agg({
    'employee_id': 'count',
    'attrition': lambda x: (x == 'Yes').sum()
}).rename(columns={'employee_id': 'total_employees', 'attrition': 'employees_left'})
tech_attrition['attrition_rate'] = (tech_attrition['employees_left'] / tech_attrition['total_employees'] * 100).round(2)

bars = ax9.bar(tech_attrition.index, tech_attrition['attrition_rate'], color=['#ff9999', '#66b3ff'], alpha=0.7)
ax9.set_title('Technical Background Attrition', fontsize=12, fontweight='bold')
ax9.set_ylabel('Attrition Rate (%)')

# Row 3: Salary Distribution
ax10 = fig.add_subplot(gs[3, 1])
ax10.hist(df['salary'], bins=15, color='#2ca02c', alpha=0.7, edgecolor='black')
ax10.set_title('Salary Distribution', fontsize=12, fontweight='bold')
ax10.set_xlabel('Salary ($)')
ax10.set_ylabel('Count')

# Row 3: Age Distribution
ax11 = fig.add_subplot(gs[3, 2])
ax11.hist(df['age'], bins=15, color='#ff7f0e', alpha=0.7, edgecolor='black')
ax11.set_title('Age Distribution', fontsize=12, fontweight='bold')
ax11.set_xlabel('Age')
ax11.set_ylabel('Count')

# Row 3: Performance Rating Distribution
ax12 = fig.add_subplot(gs[3, 3])
perf_dist = df['performance_rating'].value_counts().sort_index()
bars = ax12.bar(perf_dist.index, perf_dist.values, color='#9467bd', alpha=0.7)
ax12.set_title('Performance Rating Distribution', fontsize=12, fontweight='bold')
ax12.set_xlabel('Performance Rating')
ax12.set_ylabel('Count')

# Row 4: Work-Life Balance Distribution
ax13 = fig.add_subplot(gs[4, 0])
wlb_dist = df['work_life_balance'].value_counts().sort_index()
bars = ax13.bar(wlb_dist.index, wlb_dist.values, color='#17becf', alpha=0.7)
ax13.set_title('Work-Life Balance Distribution', fontsize=12, fontweight='bold')
ax13.set_xlabel('Work-Life Balance Score')
ax13.set_ylabel('Count')

# Row 4: Key Insights Summary
ax14 = fig.add_subplot(gs[4, 1:])
ax14.axis('off')

insights_text = f"""
üîç KEY INSIGHTS SUMMARY:

üìä OVERALL METRICS:
‚Ä¢ Total Employees: {len(df):,}
‚Ä¢ Overall Attrition Rate: {(df['attrition'] == 'Yes').mean()*100:.2f}%
‚Ä¢ Average Employee Age: {df['age'].mean():.1f} years
‚Ä¢ Average Salary: ${df['salary'].mean():,.0f}

üè¢ DEPARTMENT ANALYSIS:
‚Ä¢ R&D: {dept_attrition.loc['R&D', 'attrition_rate']:.1f}% attrition rate (HIGHEST)
‚Ä¢ IT: {dept_attrition.loc['IT', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Sales: {dept_attrition.loc['Sales', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Marketing: {dept_attrition.loc['Marketing', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Operations: {dept_attrition.loc['Operations', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Finance: {dept_attrition.loc['Finance', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ HR: {dept_attrition.loc['HR', 'attrition_rate']:.1f}% attrition rate (LOWEST)

üë• GENDER ANALYSIS:
‚Ä¢ Female: {gender_attrition.loc['Female', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Male: {gender_attrition.loc['Male', 'attrition_rate']:.1f}% attrition rate

üíª TECHNICAL BACKGROUND ANALYSIS:
‚Ä¢ Technical: {tech_attrition.loc['Yes', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Non-Technical: {tech_attrition.loc['No', 'attrition_rate']:.1f}% attrition rate

üéØ CRITICAL FINDINGS:
‚Ä¢ R&D department shows highest attrition at {dept_attrition.loc['R&D', 'attrition_rate']:.1f}%
‚Ä¢ Technical employees have {tech_attrition.loc['Yes', 'attrition_rate']:.1f}% attrition rate
‚Ä¢ Gender-based attrition: Female {gender_attrition.loc['Female', 'attrition_rate']:.1f}% vs Male {gender_attrition.loc['Male', 'attrition_rate']:.1f}%
"""

ax14.text(0.02, 0.98, insights_text, transform=ax14.transAxes, fontsize=10, 
          verticalalignment='top', fontfamily='monospace',
          bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))

plt.suptitle('HR Analytics Dashboard - Complete Overview', fontsize=20, fontweight='bold', y=0.98)
plt.show()

print("üì∏ Take screenshot of the complete dashboard above")


In [None]:
# Generate insights summary
print("üîç HR ANALYTICS DASHBOARD - KEY INSIGHTS")
print("=" * 50)

print(f"\nüìä OVERALL METRICS:")
print(f"‚Ä¢ Total Employees: {len(df):,}")
print(f"‚Ä¢ Overall Attrition Rate: {(df['attrition'] == 'Yes').mean()*100:.2f}%")
print(f"‚Ä¢ Average Employee Age: {df['age'].mean():.1f} years")
print(f"‚Ä¢ Average Salary: ${df['salary'].mean():,.0f}")

print(f"\nüè¢ DEPARTMENT ANALYSIS:")
dept_attrition_sorted = dept_attrition.sort_values('attrition_rate', ascending=False)
for dept, row in dept_attrition_sorted.iterrows():
    print(f"‚Ä¢ {dept}: {row['attrition_rate']:.1f}% attrition rate")

print(f"\nüë• GENDER ANALYSIS:")
for gender, row in gender_attrition.iterrows():
    print(f"‚Ä¢ {gender}: {row['attrition_rate']:.1f}% attrition rate")

print(f"\nüíª TECHNICAL BACKGROUND ANALYSIS:")
for tech, row in tech_attrition.iterrows():
    print(f"‚Ä¢ {tech}: {row['attrition_rate']:.1f}% attrition rate")

print(f"\nüéØ CRITICAL FINDINGS:")
print(f"‚Ä¢ R&D department shows highest attrition at {dept_attrition.loc['R&D', 'attrition_rate']:.1f}%")
print(f"‚Ä¢ Technical employees have {tech_attrition.loc['Yes', 'attrition_rate']:.1f}% attrition rate")
print(f"‚Ä¢ Gender-based attrition: Female {gender_attrition.loc['Female', 'attrition_rate']:.1f}% vs Male {gender_attrition.loc['Male', 'attrition_rate']:.1f}%")

print("\nüì∏ Take screenshot of the insights summary above")
