In [1]:
# Uploading attendance, tasks, and employees CSV files
from google.colab import files
uploaded = files.upload()


Saving attendance.csv to attendance.csv
Saving tasks.csv to tasks.csv
Saving employees.csv to employees.csv


In [2]:
#Import libraries and load CSV data into DataFrames
import pandas as pd
import numpy as np

attendance_df = pd.read_csv("attendance.csv")
tasks_df = pd.read_csv("tasks.csv")
employees_df = pd.read_csv("employees.csv")


In [3]:
#Convert date and time columns for proper calculation
attendance_df['date'] = pd.to_datetime(attendance_df['date'])
attendance_df['clock_in'] = pd.to_datetime(attendance_df['clock_in'], format='%H:%M:%S').dt.time
attendance_df['clock_out'] = pd.to_datetime(attendance_df['clock_out'], format='%H:%M:%S').dt.time

attendance_df['clock_in_full'] = pd.to_datetime(attendance_df['date'].astype(str) + ' ' + attendance_df['clock_in'].astype(str))
attendance_df['clock_out_full'] = pd.to_datetime(attendance_df['date'].astype(str) + ' ' + attendance_df['clock_out'].astype(str))


In [4]:
#Calculate total work hours in hours
attendance_df['work_hours'] = (attendance_df['clock_out_full'] - attendance_df['clock_in_full']).dt.total_seconds() / 3600


In [11]:
#Merge with employees and calculate tasks completed
merged_df = attendance_df.merge(employees_df, on='employee_id')

task_counts = tasks_df.groupby('employee_id')['task_name'].count().reset_index()
task_counts.columns = ['employee_id', 'tasks_completed']

temp_df = merged_df.merge(task_counts, on='employee_id', how='left')
temp_df['productivity_score'] = temp_df['tasks_completed'] / temp_df['work_hours']


In [12]:
summary = temp_df.groupby(['employee_id', 'name', 'department_name']).agg({
    'work_hours': 'mean',
    'productivity_score': 'mean'
}).reset_index()

top_performers = summary.sort_values(by='productivity_score', ascending=False).head(3)
print("🏆 Top 3 Performers:\n", top_performers)


🏆 Top 3 Performers:
    employee_id   name department_name  work_hours  productivity_score
5            6  Manoj           Sales    7.000000            0.142857
2            3  Sneha           Sales    7.250000            0.138095
1            2   Anil              IT    7.458333            0.134116


In [8]:
# Summary report with average work hours and productivity score
daily_attendance = attendance_df.groupby('employee_id')['date'].nunique().reset_index()
daily_attendance.columns = ['employee_id', 'days_present']

absentees = daily_attendance[daily_attendance['days_present'] <= 1]
print("Frequent Absentees (<= 1 day present):\n", absentees)


Frequent Absentees (<= 1 day present):
    employee_id  days_present
5            6             1
6            7             1
7            8             1


In [19]:
#  Top and Bottom performers
top_performers = top_performers.copy()
bottom_performers = bottom_performers.copy()

# Add rank column
top_performers['Rank'] = 'Top'
bottom_performers['Rank'] = 'Bottom'

#  Combine and save
final_report = pd.concat([top_performers, bottom_performers])
final_report.to_csv("top_bottom_performers.csv", index=False)

#  Download
from google.colab import files
files.download("top_bottom_performers.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
#Detect employees with very low attendance (<= 1 day)
summary.to_csv("performance_report.csv", index=False)

from google.colab import files
files.download("performance_report.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [15]:
temp_df.to_csv("cleaned_attendance.csv", index=False)
from google.colab import files
files.download("cleaned_attendance.csv")



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>