In [1]:
import featuretools as ft
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import random

In [None]:
%run ../../../data/raw/employee_generator.ipynb

In [None]:
class DynamicFeatureEngineering:
    def __init__(self, employee_data, course_data, leave_data, timesheet_data):
        self.employee_data = employee_data
        self.course_data = course_data
        self.leave_data = leave_data
        self.timesheet_data = timesheet_data
        self.add_attrition_feature()

        self.es = ft.EntitySet(id="AttritionFeatures")

        self.es = self.es.add_dataframe(dataframe_name="employees",
                                        dataframe=self.employee_data,
                                        index="Employee ID",
                                        time_index="Start Date")
        
        self.es = self.es.add_dataframe(dataframe_name="courses",
                                        dataframe=self.course_data,
                                        index="Course ID",
                                        time_index="Course Start Date")
        
        self.es = self.es.add_relationship("employees", "Employee ID", "courses", "Employee ID")
        
        self.es = self.es.add_dataframe(dataframe_name="leaves",
                                        dataframe=self.leave_data,
                                        index="Leave ID",
                                        time_index="Leave Date")

        self.es = self.es.add_relationship("employees", "Employee ID", "leaves", "Employee ID")

        self.es = self.es.add_dataframe(dataframe_name="timesheets",
                                        dataframe=self.timesheet_data,
                                        index="Timesheet ID",
                                        time_index="Timesheet Date")

        self.es = self.es.add_relationship("employees", "Employee ID", "timesheets", "Employee ID")

    def add_attrition_feature(self):
        self.employee_data['Attrition'] = self.employee_data['Termination Date'].apply(
            lambda x: 1 if pd.notna(x) else 0
        )
        
    def generate_features(self):
        feature_matrix, feature_defs = ft.dfs(entityset=self.es,
                                            target_dataframe_name="employees",
                                            agg_primitives=["mean", "sum", "max", "min", "mode", "count", "trend", "std"],
                                            trans_primitives=["add_numeric", "multiply_numeric", "month", "weekday", "day", "year", "is_weekend"],
                                            where_primitives=["mean", "sum"],
                                            max_depth=3)
        return feature_matrix

In [7]:
def dynamic_features(n_employees):
    employee_manager = EmployeeManager(n_employees)
    course_manager = CourseManager(employee_manager.employees)
    leave_manager = LeaveManager(employee_manager.employees)
    timesheet_manager = TimesheetManager(employee_manager.employees, datetime.now())

    employee_data = employee_manager.get_employee_data()
    course_data = course_manager.get_course_data()
    leave_data = leave_manager.get_leave_data()
    timesheet_data = timesheet_manager.get_timesheet_data()

    features = DynamicFeatureEngineering(employee_data, course_data, leave_data, timesheet_data)
    features_feature_matrix = features.generate_features()

    return features_feature_matrix