**Importing Libraries**

In [3]:
import pandas as pd
import numpy as np

**Uploading files**

In [1]:
from google.colab import files
uploaded = files.upload()

Saving attendance.csv to attendance.csv
Saving employees.csv to employees.csv
Saving tasks.csv to tasks.csv


**Loading files**

In [4]:
dfAttendance = pd.read_csv(r"/content/attendance.csv")
dfTasks = pd.read_csv(r"/content/tasks.csv")
dfEmpl = pd.read_csv(r"/content/employees.csv")

**Printing DataFrames**

In [5]:
dfAttendance.head()

Unnamed: 0,attendanceID,employeeID,date,clockIN,clockOUT,isLate,isAbscent
0,1,1,2024-06-01,2024-06-01 09:02:00,2024-06-01 17:00:00,1,0
1,2,2,2024-06-01,2024-06-01 08:55:00,2024-06-01 17:10:00,0,0
2,3,3,2024-06-01,,,0,1
3,4,4,2024-06-01,2024-06-01 09:10:00,2024-06-01 17:05:00,1,0
4,5,5,2024-06-01,2024-06-01 08:50:00,2024-06-01 16:45:00,0,0


In [6]:
dfTasks.head()

Unnamed: 0,taskID,employeeID,taskName,taskDate,tasksCompleted,hoursSpent,productivityScore
0,1,1,API Integration,2024-06-01,5,6.0,0.83
1,2,2,Content Calendar Creation,2024-06-01,3,4.5,0.67
2,3,3,Policy Review,2024-06-01,0,0.0,0.0
3,4,4,CI/CD Setup,2024-06-01,4,5.0,0.8
4,5,5,Invoice Auditing,2024-06-01,6,7.5,0.8


In [7]:
dfEmpl.head()

Unnamed: 0,employeeID,name,department,role,email,hireDate,status
0,1,John Doe,Engineering,Software Developer,john.doe@example.com,2023-01-15 00:00:00,Active
1,2,Jane Smith,Marketing,Content Strategist,jane.smith@example.com,2022-11-20 00:00:00,Active
2,3,Alice Johnson,HR,HR Manager,alice.johnson@example.com,2021-09-10 00:00:00,Active
3,4,Bob Brown,Engineering,DevOps Engineer,bob.brown@example.com,2023-05-01 00:00:00,Active
4,5,Eva Green,Finance,Accountant,eva.green@example.com,2022-06-30 00:00:00,Resigned


**Cleaning null values or missing entries**

In [8]:
dfTasks = dfTasks.dropna()
dfAttendance = dfAttendance.dropna()

**Calculating workhours, breaktimes and productivity score**

In [9]:
df = dfAttendance.merge(dfTasks, how="inner", on="employeeID").merge(dfEmpl, how="inner", on="employeeID")

In [10]:
df["workingHours"] = round(abs((pd.to_datetime(df["clockIN"]) - pd.to_datetime(df["clockOUT"])).dt.total_seconds() / 3600), 2)
df["productivityScore"] = round(df["tasksCompleted"] / df["workingHours"], 2)
df["breakTimes"] = round(df["workingHours"] / 4)

In [11]:
df.head()

Unnamed: 0,attendanceID,employeeID,date,clockIN,clockOUT,isLate,isAbscent,taskID,taskName,taskDate,...,hoursSpent,productivityScore,name,department,role,email,hireDate,status,workingHours,breakTimes
0,1,1,2024-06-01,2024-06-01 09:02:00,2024-06-01 17:00:00,1,0,1,API Integration,2024-06-01,...,6.0,0.63,John Doe,Engineering,Software Developer,john.doe@example.com,2023-01-15 00:00:00,Active,7.97,2.0
1,2,2,2024-06-01,2024-06-01 08:55:00,2024-06-01 17:10:00,0,0,2,Content Calendar Creation,2024-06-01,...,4.5,0.36,Jane Smith,Marketing,Content Strategist,jane.smith@example.com,2022-11-20 00:00:00,Active,8.25,2.0
2,4,4,2024-06-01,2024-06-01 09:10:00,2024-06-01 17:05:00,1,0,4,CI/CD Setup,2024-06-01,...,5.0,0.51,Bob Brown,Engineering,DevOps Engineer,bob.brown@example.com,2023-05-01 00:00:00,Active,7.92,2.0
3,5,5,2024-06-01,2024-06-01 08:50:00,2024-06-01 16:45:00,0,0,5,Invoice Auditing,2024-06-01,...,7.5,0.76,Eva Green,Finance,Accountant,eva.green@example.com,2022-06-30 00:00:00,Resigned,7.92,2.0


**top and bottom performer**

In [12]:
summary = df.groupby("employeeID").agg(
    hoursSpent=pd.NamedAgg(column="workingHours", aggfunc="mean"),
    productivityScore=pd.NamedAgg(column="productivityScore", aggfunc="mean"),
    abscentCount=pd.NamedAgg(column="isAbscent", aggfunc="sum")
)

In [19]:
summary_final = dfEmpl[["employeeID", "name"]].merge(summary, on="employeeID", how="left")
topPerformer = summary_final.sort_values("productivityScore", ascending=False).iloc[0, :].rename("TopPerformer")
bottomPerformer = summary_final.sort_values(["abscentCount", "productivityScore"], ascending=[False, True]).iloc[0, :].rename("BottomPerformer")

**Delieverables:-**

In [13]:
#Cleaned attendance and task dataset

dfAttendance.to_csv("cleaned_attendance.csv")
dfTasks.to_csv("cleaned_tasks.csv")

In [20]:
#report of top and bottom performers

print("-----------------Top performer report----------------------")
print(f"Top performer: {topPerformer.iloc[1]}")
for i, j in topPerformer.items():
  if i != "name":
    print(f"{i}: {j}")
print("-----------------------------------------------------------\n")

print("-----------------Bottom performer report-------------------")
print(f"Bottom performer: {bottomPerformer.iloc[1]}")
for i, j in bottomPerformer.items():
  if i != "name":
    print(f"{i}: {j}")
print("-----------------------------------------------------------\n")

-----------------Top performer report----------------------
Top performer: Eva Green
employeeID: 5
hoursSpent: 7.92
productivityScore: 0.76
abscentCount: 0.0
-----------------------------------------------------------

-----------------Bottom performer report-------------------
Bottom performer: Jane Smith
employeeID: 2
hoursSpent: 8.25
productivityScore: 0.36
abscentCount: 0.0
-----------------------------------------------------------

