In [2]:
import pandas as pd
import numpy as np
df = pd.read_csv('HR_data.csv')

### 1. What is the total number of employees?

In [4]:
total_employees = len(df)
print(f"Total Employees: {total_employees}")

Total Employees: 1470


### 2. What is the average age of employees?

In [6]:
avg_age = df['Age'].mean()
print(f"Average Age: {avg_age:.2f} years")

Average Age: 36.92 years


### 3. What is the overall attrition rate?

In [8]:
attrition_count = df[df['Attrition'] == 'Yes'].shape[0]
attrition_rate = (attrition_count / len(df)) * 100
print(f"Attrition Count: {attrition_count}")
print(f"Attrition Rate: {attrition_rate:.2f}%")

Attrition Count: 237
Attrition Rate: 16.12%


### 4. Which job roles see the most attrition?

In [10]:
attrition_by_job = df[df['Attrition'] == 'Yes']['JobRole'].value_counts()
print(attrition_by_job)

JobRole
Laboratory Technician        62
Sales Executive              57
Research Scientist           47
Sales Representative         33
Human Resources              12
Manufacturing Director       10
Healthcare Representative     9
Manager                       5
Research Director             2
Name: count, dtype: int64


### 5. What is the attrition distribution by gender?

In [12]:
attrition_by_gender = df[df['Attrition'] == 'Yes']['Gender'].value_counts()
print(attrition_by_gender)

Gender
Male      150
Female     87
Name: count, dtype: int64


### 6. What education backgrounds are most affected by attrition?

In [14]:
attrition_by_edu = df[df['Attrition'] == 'Yes']['EducationField'].value_counts(normalize=True) * 100
print(attrition_by_edu.round(2))

EducationField
Life Sciences       37.55
Medical             26.58
Marketing           14.77
Technical Degree    13.50
Other                4.64
Human Resources      2.95
Name: proportion, dtype: float64


### 7. How does attrition vary by age group?

In [16]:
bins = [18, 30, 45, 60]
labels = ['18-30', '31-45', '46-60']
df['AgeGroup'] = pd.cut(df['Age'], bins=bins, labels=labels, right=True)
attrition_by_age_group = df[df['Attrition'] == 'Yes']['AgeGroup'].value_counts()
print(attrition_by_age_group)

AgeGroup
31-45    103
18-30     96
46-60     34
Name: count, dtype: int64


### 8. Does employee experience (years at company) affect attrition?

In [37]:
attrition_by_years = df[df['Attrition'] == 'Yes']['YearsAtCompany'].value_counts().sort_index()
print(attrition_by_years.head(6))

YearsAtCompany
0    16
1    59
2    27
3    20
4    19
5    21
Name: count, dtype: int64


### 9. Which salary brackets see the most attrition?

In [20]:
salary_bins = [0, 5000, 10000, 20000]
salary_labels = ['0-5k', '6k-10k', '11k-20k']
df['SalaryRange'] = pd.cut(df['MonthlyIncome'], bins=salary_bins, labels=salary_labels, right=True)
attrition_by_salary = df[df['Attrition'] == 'Yes']['SalaryRange'].value_counts()
print(attrition_by_salary)

SalaryRange
0-5k       163
6k-10k      49
11k-20k     25
Name: count, dtype: int64
