<a href="https://colab.research.google.com/github/MoonlightO2/HR-Data/blob/main/HR_Performance_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Human Resource Data Analysis**
***Dataset***: https://www.kaggle.com/datasets/rhuebner/human-resources-data-set

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
file = '/content/drive/MyDrive/Projects/Portfolio/HR/HR Data II.csv'

In [12]:
data = pd.read_csv(file)
print('Shape: ', data.shape, '\n')
print('Columns: ', data.columns.tolist(), '\n')
data.head()

Shape:  (104, 37) 

Columns:  ['Unnamed: 0', 'Employee_Name', 'EmpID', 'MarriedID', 'MaritalStatusID', 'GenderID', 'EmpStatusID', 'DeptID', 'PerfScoreID', 'FromDiversityJobFairID', 'Salary', 'Termd', 'PositionID', 'Position', 'State', 'Zip', 'DOB', 'Sex', 'MaritalDesc', 'CitizenDesc', 'HispanicLatino', 'RaceDesc', 'DateofHire', 'DateofTermination', 'TermReason', 'EmploymentStatus', 'Department', 'ManagerName', 'ManagerID', 'RecruitmentSource', 'PerformanceScore', 'EngagementSurvey', 'EmpSatisfaction', 'SpecialProjectsCount', 'LastPerformanceReview_Date', 'DaysLateLast30', 'Absences'] 



Unnamed: 0.1,Unnamed: 0,Employee_Name,EmpID,MarriedID,MaritalStatusID,GenderID,EmpStatusID,DeptID,PerfScoreID,FromDiversityJobFairID,...,ManagerName,ManagerID,RecruitmentSource,PerformanceScore,EngagementSurvey,EmpSatisfaction,SpecialProjectsCount,LastPerformanceReview_Date,DaysLateLast30,Absences
0,1,"Ait Sidi, Karthikeyan",10084,1,1,1,5,3,3,0,...,Simon Roup,4.0,Indeed,Fully Meets,4.96,3,6,2/24/2016,0,17
1,2,"Akinkuolie, Sarah",10196,1,1,0,5,5,3,0,...,Kissy Sullivan,20.0,LinkedIn,Fully Meets,3.02,3,0,5/15/2012,0,3
2,4,"Anderson, Carol",10069,0,2,0,5,5,3,0,...,Webster Butler,39.0,Google Search,Fully Meets,5.0,4,0,2/1/2016,0,2
3,10,"Baczenski, Rachael",10252,1,1,0,5,5,3,1,...,David Stanley,14.0,Diversity Job Fair,Fully Meets,4.2,4,0,1/30/2016,0,12
4,11,"Barbara, Thomas",10242,1,1,1,5,5,3,1,...,Kissy Sullivan,20.0,Diversity Job Fair,Fully Meets,4.2,3,0,5/6/2016,0,15


### **Employee Performance Report**

In [18]:
# Define a function to generate the employee performance report
def generate_performance_report(data):
    # Define key metrics
    key_metrics = {
        'Employee Count': data['EmpID'].nunique(),
        'Average Salary': data['Salary'].mean(),
        'Average Engagement Survey Score': data['EngagementSurvey'].mean(),
        'Average Employee Satisfaction': data['EmpSatisfaction'].mean(),
        'Average Absences (Last 30 days)': data['Absences'].mean()
    }

    # Generate employee turnover analysis
    turnover_analysis = {
        'Total Terminated Employees': data[data['Termd'] == 1]['EmpID'].nunique(),
        'Termination Rate': data['Termd'].mean(),
        'Most Common Termination Reasons': data[data['Termd'] == 1]['TermReason'].value_counts().idxmax()
    }

    # Generate recruitment efficiency analysis
    recruitment_efficiency_analysis = {
        'Average Time-to-Fill': (pd.to_datetime(data[data['Termd'] == 1]['DateofTermination']) - pd.to_datetime(data[data['Termd'] == 1]['DateofHire'])).mean().days,
        'Average Cost-per-Hire': (data[data['Termd'] == 1]['Salary']).mean()
    }

    # Generate training and development analysis
    training_analysis = {
        'Training Participation Rate': data['SpecialProjectsCount'].apply(lambda x: 1 if x > 0 else 0).mean()
    }

    # Generate performance reviews analysis
    performance_reviews_analysis = {
        'Average Performance Score': data['PerfScoreID'].mean()
    }

    # Generate diversity and inclusion analysis
    diversity_inclusion_analysis = {
        'Diversity Metrics': data['RaceDesc'].value_counts(normalize=True),
        'Inclusion Initiatives': data['FromDiversityJobFairID'].value_counts(normalize=True)
    }

    # Combine all analysis results
    analysis_results = {
        'Key Metrics': key_metrics,
        'Turnover Analysis': turnover_analysis,
        'Recruitment Efficiency Analysis': recruitment_efficiency_analysis,
        'Training and Development Analysis': training_analysis,
        'Performance Reviews Analysis': performance_reviews_analysis,
        'Diversity and Inclusion Analysis': diversity_inclusion_analysis
    }

    return analysis_results

# Generate the performance report
performance_report = generate_performance_report(data)

# Print the performance report
for section, metrics in performance_report.items():
    print(f'{section}:')
    for metric, value in metrics.items():
        print(f'- {metric}: {value}\n')

Key Metrics:
- Employee Count: 104

- Average Salary: 65690.07692307692

- Average Engagement Survey Score: 4.090480769230769

- Average Employee Satisfaction: 3.8846153846153846

- Average Absences (Last 30 days): 11.048076923076923

Turnover Analysis:
- Total Terminated Employees: 104

- Termination Rate: 1.0

- Most Common Termination Reasons: Another Position

Recruitment Efficiency Analysis:
- Average Time-to-Fill: 1264

- Average Cost-per-Hire: 65690.07692307692

Training and Development Analysis:
- Training Participation Rate: 0.14423076923076922

Performance Reviews Analysis:
- Average Performance Score: 2.9038461538461537

Diversity and Inclusion Analysis:
- Diversity Metrics: White                        0.605769
Black or African American    0.278846
Asian                        0.086538
Two or more races            0.028846
Name: RaceDesc, dtype: float64

- Inclusion Initiatives: 0    0.846154
1    0.153846
Name: FromDiversityJobFairID, dtype: float64

