In [1]:
import os
import math
from datetime import datetime
from dotenv import load_dotenv
import pymysql


load_dotenv()


# Database configuration1
DB_HOST = os.getenv("DB_HOST")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_SCHEMA = os.getenv("DB_SCHEMA")


# Connect to the database
con = pymysql.connect(host=DB_HOST, user=DB_USER, password=DB_PASSWORD, port=3306, database=DB_SCHEMA)


data = []
month = 11
days = 30
year = 2024


# Prepare the headers
headers = ["Emp Id", "Name", "Deploy Site", "Employee Type"]
headers.extend([f"{datetime(year, month, i).date().strftime('%d %B')}" for i in range(1, days + 1)])
headers.extend(["Total Days", "Total OT"])


# Define cutoff time for reducing OT hours if necessary
cutoff_time = datetime.strptime("07:30:00", "%H:%M:%S").time()


# Fetch employee data
with pymysql.cursors.DictCursor(con) as cursor:
   cursor.execute("SELECT * FROM employees")
   employees = cursor.fetchall()


   for employee in employees:
       # Initialize employee record in data
       employee_data = {
           "Emp Id": employee['empId'],
           "Name": employee['employeeName'],
           "Deploy Site": employee['employeeDeploySite'],
           "Employee Type": employee['employeeTypeId'],
           "Total Days": 0,
           "Total OT": 0
       }


       # Fetch attendance records with daily OT for each day
       with pymysql.cursors.DictCursor(con) as attCursor:
           attCursor.execute(
               "SELECT * FROM attendance_record WHERE emp_id = %s AND date BETWEEN %s AND %s ORDER BY date",
               (employee['empId'], f"2024-{month:02d}-01", f"2024-{month:02d}-31")
           )
           attendances = attCursor.fetchall()


           # Initialize OT tracking
           total_ot = 0


           for attendance in attendances:
               # Format punch-in and punch-out times
               punch_in_time = attendance['punch_in'][11:19] if attendance['punch_in'] else "NaN"
               punch_out_time = attendance['punch_out'][11:19] if attendance['punch_out'] else "NaN"
              
               # Set daily OT hours with a default value of 0 if it's missing or None
               ot_hours = attendance['ot_hours'] if attendance['ot_hours'] is not None else 0
               if attendance['punch_in'] and datetime.strptime(punch_in_time, "%H:%M:%S").time() > cutoff_time:
                   ot_hours = max(0, ot_hours - 1)  # Deduct 1 hour if punch-in is late


               # Update the day's column with punch times and OT hours
               date_key = attendance["date"].strftime("%d %B")
               employee_data[date_key] = (
                   f"{punch_in_time}\n{punch_out_time}\nOT: {ot_hours}"
               )


               # Accumulate total OT for the month
               total_ot += ot_hours


       # Fetch attendance history for total days
       with pymysql.cursors.DictCursor(con) as attCursor2:
           attCursor2.execute(
               "SELECT * FROM attendance_history WHERE emp_id = %s AND year = %s AND month = %s",
               (employee['empId'], year, month)
           )
           history = attCursor2.fetchone()


           if history:
               employee_data["Total Days"] = math.ceil(history['days'])
               employee_data["Total OT"] = total_ot


       data.append(employee_data)


# Close the database connection
con.close()


# Now `data` contains daily OT values along with total monthly OT.





In [2]:
import pandas as pd

df = pd.DataFrame(data)

for header in headers:
    if not header in df.columns:
        df[header] = None

df = df[headers]
df.to_csv("output.csv", index=False)

# Checking Abnormal OT hours


In [3]:
import os
from datetime import datetime

from dotenv import load_dotenv
import pymysql

load_dotenv()

DB_HOST = os.getenv("DB_HOST")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_SCHEMA = os.getenv("DB_SCHEMA")

con = pymysql.connect(host=DB_HOST, user=DB_USER, password=DB_PASSWORD, port=3306, database=DB_SCHEMA)

data = []
# for employee details
employees = []
punchIns = []
punchOuts = []
month = 5
days = 30

def entryExists(data, empId):
    for d in data:
        if d['empId'] == empId:
            return True
    
    return False

def getIndex(data, empId):
    for i in range(len(data)):
        if data[i]['empId'] == empId:
            return i

with pymysql.cursors.DictCursor(con) as attCursor:
    attCursor.execute("SELECT * FROM attendance_record WHERE (ot_hours > 4 OR work_hours < 5) AND date BETWEEN %s AND %s ORDER BY date", (f"2024-{month if month > 9 else f'0{month}'}-01", f"2024-{month if month > 9 else f'0{month}'}-08"))

    attendances = attCursor.fetchall()

    for attendance in attendances:

        if attendance["punch_in"] is not None and attendance["punch_in"] not in punchIns and attendance["punch_out"] is not None and attendance["punch_out"] not in punchOuts:
            if not entryExists(employees, attendance['emp_id']):
                attCursor.execute("SELECT * FROM employees WHERE empId = %s", (attendance['emp_id'],))
                employees.append(attCursor.fetchone())

            punchIns.append(attendance["punch_in"])
            punchOuts.append(attendance["punch_out"])

        if not entryExists(data, attendance['emp_id']):
            data.append({ "empId": attendance['emp_id'] })
        
        index = getIndex(data, attendance['emp_id'])

        data[index][attendance["date"].strftime("%d %B")] = f"{attendance["punch_in"] if attendance["punch_in"] is not None else "NaN"}\n{attendance["punch_out"] if attendance["punch_out"] is not None else "NaN"}\nwork hours: {attendance['work_hours']}\not hours: {attendance['ot_hours']}"

con.close()

In [5]:
print(len(punchIns), len(punchOuts), len(employees))

526 526 169


In [6]:
import json

with open("employees.json", "w") as file:
    json.dump(employees, file)

with open("punchIns.json", "w") as file:
    json.dump(punchIns, file)

with open("punchOuts.json", "w") as file:
    json.dump(punchOuts, file)


In [2]:
import pandas as pd

df = pd.DataFrame(data)

df.to_csv("anamoly.csv", index=False)