In [None]:
# Import necessary libraries
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from google.colab import files

# Upload the dataset
print("Please upload the dataset.")
uploaded = files.upload()

# Load the uploaded dataset
file_name = list(uploaded.keys())[0]  # Get the uploaded file name
data = pd.read_csv(file_name)

# Display the first few rows to understand the structure
print("Dataset Preview:")
print(data.head())

# Display basic information about the dataset
print("\nDataset Info:")
print(data.info())

# Display basic statistics for numeric columns
print("\nBasic Statistics:")
print(data.describe())

# Check for missing values
missing_values = data.isnull().sum()
print("\nMissing Values in Each Column:")
print(missing_values)

# Custom KPI 1: Attrition Rate
attrition_rate = data['Attrition'].value_counts(normalize=True)['Yes'] * 100
print(f"\nAttrition Rate: {attrition_rate:.2f}%")

# Custom KPI 2: Average Monthly Income by Department
avg_income_by_dept = data.groupby('Department')['MonthlyIncome'].mean()
print("\nAverage Monthly Income by Department:")
print(avg_income_by_dept)

# Custom KPI 3: Job Satisfaction Score Distribution
job_satisfaction = data['JobSatisfaction'].value_counts(normalize=True) * 100
print("\nJob Satisfaction Distribution (Percentage):")
print(job_satisfaction)

# Visualize KPI 1: Attrition Rate as a Pie Chart
attrition_counts = data['Attrition'].value_counts()
fig1 = px.pie(values=attrition_counts.values, names=attrition_counts.index,
              title="Attrition Rate")
fig1.show()

# Visualize KPI 2: Average Monthly Income by Department as a Bar Chart
fig2 = px.bar(x=avg_income_by_dept.index, y=avg_income_by_dept.values,
              labels={'x': 'Department', 'y': 'Average Monthly Income'},
              title="Average Monthly Income by Department")
fig2.show()

# Visualize KPI 3: Job Satisfaction Score Distribution
fig3 = px.bar(x=job_satisfaction.index, y=job_satisfaction.values,
              labels={'x': 'Job Satisfaction Score', 'y': 'Percentage'},
              title="Job Satisfaction Distribution")
fig3.show()

# Custom KPI 4: Overtime Impact on Attrition
overtime_attrition = data.groupby('OverTime')['Attrition'].value_counts(normalize=True).unstack()
print("\nAttrition Rate by Overtime Status:")
print(overtime_attrition)

# Visualize KPI 4: Overtime vs Attrition
fig4 = px.bar(overtime_attrition, barmode='group',
              labels={'value': 'Attrition Rate', 'OverTime': 'Overtime'},
              title="Overtime vs Attrition")
fig4.show()

# Bonus: Dashboard Summary Table
kpi_summary = pd.DataFrame({
    "Metric": ["Attrition Rate", "Highest Paying Department", "Most Satisfied Job Level"],
    "Value": [f"{attrition_rate:.2f}%",
              avg_income_by_dept.idxmax(),
              job_satisfaction.idxmax()]
})
print("\nDashboard Summary:")
print(kpi_summary)



Please upload the dataset.


Saving HR-Employee-Attrition.csv to HR-Employee-Attrition (2).csv
Dataset Preview:
   Age Attrition     BusinessTravel  DailyRate              Department  \
0   41       Yes      Travel_Rarely       1102                   Sales   
1   49        No  Travel_Frequently        279  Research & Development   
2   37       Yes      Travel_Rarely       1373  Research & Development   
3   33        No  Travel_Frequently       1392  Research & Development   
4   27        No      Travel_Rarely        591  Research & Development   

   DistanceFromHome  Education EducationField  EmployeeCount  EmployeeNumber  \
0                 1          2  Life Sciences              1               1   
1                 8          1  Life Sciences              1               2   
2                 2          2          Other              1               4   
3                 3          4  Life Sciences              1               5   
4                 2          1        Medical              1            


Attrition Rate by Overtime Status:
Attrition        No       Yes
OverTime                     
No         0.895636  0.104364
Yes        0.694712  0.305288



Dashboard Summary:
                      Metric   Value
0             Attrition Rate  16.12%
1  Highest Paying Department   Sales
2   Most Satisfied Job Level       4
