In [1]:
import pandas as pd
import numpy as np

# Load & Prepare Data
df = pd.read_csv("../data/employee_data.csv")
df["JoinDate"] = pd.to_datetime(df["JoinDate"], dayfirst=True)

# NumPy Core Statistics
salary = df["Salary"].to_numpy()
experience = df["Experience"].to_numpy()
performance = df["PerformanceScore"].to_numpy()

mean_salary = salary.mean()
median_experience = np.median(experience)
performance_std = performance.std()

# Salary vs Experience Analysis
df["SalaryPerExperience"] = df["Salary"] / df["Experience"].replace(0, np.nan)

top_3 = df.nlargest(3, "SalaryPerExperience")[["Name", "SalaryPerExperience"]]
bottom_3 = df.nsmallest(3, "SalaryPerExperience")[["Name", "SalaryPerExperience"]]

# Department-Level Summary
dept_summary = (
    df.groupby("Department")
      .agg(
          AvgSalary=("Salary", "mean"),
          AvgPerformance=("PerformanceScore", "mean"),
          EmployeeCount=("EmployeeID", "count")
      )
)

# Bonus Simulation
conditions = [
    performance >= 8,
    performance == 7
]

choices = [
    salary * 0.10,
    salary * 0.05
]

df["Bonus"] = np.select(conditions, choices, default=0)
df["FinalSalary"] = df["Salary"] + df["Bonus"]

# Time-Based Analysis
df["JoinYear"] = df["JoinDate"].dt.year
avg_salary_by_year = df.groupby("JoinYear")["Salary"].mean()

# Final Results
analysis_results = {
    "Mean Salary": mean_salary,
    "Median Experience": median_experience,
    "Performance Std Dev": performance_std,
    "Top Salary/Experience": top_3,
    "Bottom Salary/Experience": bottom_3,
    "Department Summary": dept_summary,
    "Avg Salary By Join Year": avg_salary_by_year
}


In [2]:
summary_stats = pd.Series(
    [mean_salary, median_experience, performance_std],
    index=["Mean Salary", "Median Experience", "Performance Std Dev"]
)

summary_stats


Mean Salary            59125.000000
Median Experience          5.000000
Performance Std Dev        0.866025
dtype: float64

In [3]:
top_3

Unnamed: 0,Name,SalaryPerExperience
2,Rahul,20000.0
0,Amit,15000.0
5,Sara,12000.0


In [4]:
bottom_3

Unnamed: 0,Name,SalaryPerExperience
4,Vikram,9000.0
3,Neha,9375.0
1,Priya,10400.0


In [5]:
dept_summary

Unnamed: 0_level_0,AvgSalary,AvgPerformance,EmployeeCount
Department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Finance,71500.0,8.5,2
HR,50000.0,7.5,2
IT,57500.0,7.0,4


In [9]:
avg_salary_by_year

JoinYear
2014    90000.0
2016    75000.0
2018    68000.0
2019    53500.0
2020    48000.0
2021    45000.0
2022    40000.0
Name: Salary, dtype: float64

In [7]:
top_3.to_csv("../outputs/top_salary_per_experience.csv", index=False)
bottom_3.to_csv("../outputs/bottom_salary_per_experience.csv", index=False)
dept_summary.to_csv("../outputs/department_summary.csv")
avg_salary_by_year.to_csv("../outputs/avg_salary_by_join_year.csv")


In [8]:
df.to_csv("../outputs/final_employee_data.csv", index=False)
