In [None]:
import random
import numpy as np
from datetime import datetime, timedelta
import pandas as pd

In [None]:
class Employee:
    business_units = ['Sales', 'IT', 'Finance', 'HR', 'Operations', 'R&D', 'Marketing', 'Product Development', 'Customer Support', 'Logistics']
    departments = ['Software Development', 'Network Administration', 'Cybersecurity', 'IT Support', 'Database Management', 'B2B Sales', 'B2C Sales', 'International Sales', 'Channel Sales', 'Retail Sales']
    job_titles = ['Manager', 'Executive', 'Analyst', 'Clerk', 'Officer', 'Senior Manager', 'Junior Executive', 'Lead Analyst', 'Senior Clerk', 'Chief Officer', 'Principal Engineer', 'Data Scientist', 'Full-Stack Developer', 'UI/UX Designer', 'Product Manager']
    marital_statuses = ['Single', 'Married', 'Divorced', 'Widowed']
    genders = ['Male', 'Female', 'Non-Binary']

    def __init__(self, emp_id, n_employees, employee_ids, attrition_rate):
        self.emp_id = emp_id
        self.manager_id = random.choice(employee_ids)
        self.previous_manager_id = random.choice([i for i in employee_ids if i != self.manager_id])
        self.business_unit = random.choice(Employee.business_units)
        self.department = random.choice(Employee.departments)
        self.job_title = random.choice(Employee.job_titles)
        self.length_of_service = np.random.randint(0, 30)
        self.age = np.random.randint(18, 65)
        self.start_date = (datetime.now() - timedelta(days=random.randint(0, 40*365))).date()
        self.attrition = random.choices([True, False], weights=[attrition_rate, 1-attrition_rate], k=1)[0]
        self.termination_date = self._calculate_termination_date() if self.attrition else None
        self.marital_status = random.choice(Employee.marital_statuses)
        self.gender = random.choice(Employee.genders)
        self.years_with_current_manager = min(random.randint(0, self.length_of_service), self.length_of_service)
        self.years_with_previous_manager = min(random.randint(0, self.length_of_service), self.length_of_service)
        self.years_since_last_promotion = min(random.randint(0, self.length_of_service), self.length_of_service)
        self.previous_job_title = random.choice([title for title in Employee.job_titles if title != self.job_title])

    def _calculate_termination_date(self):
        if random.choice([True, False]):
            days_until_now = (datetime.now().date() - self.start_date).days
            return self.start_date + timedelta(days=random.randint(0, days_until_now))
        return None

    def to_dict(self):
        return {
            'Employee ID': self.emp_id,
            'Manager ID': self.manager_id,
            'Previous Manager ID': self.previous_manager_id,
            'Business Unit': self.business_unit,
            'Department': self.department,
            'Job Title': self.job_title,
            'Length of Service': self.length_of_service,
            'Age': self.age,
            'Start Date': self.start_date,
            'Termination Date': self.termination_date,
            'Marital Status': self.marital_status,
            'Gender': self.gender,
            'Years with current manager': self.years_with_current_manager,
            'Years with previous manager': self.years_with_previous_manager,
            'Years since last promotion': self.years_since_last_promotion,
            'Previous Job Title': self.previous_job_title
        }

In [None]:
class EmployeeManager:
    def __init__(self, n_employees, attrition_rate=0.5):
        self.n_employees = n_employees
        self.employee_ids = list(range(1, n_employees + 1))
        self.employees = [Employee(emp_id, n_employees, self.employee_ids, attrition_rate) for emp_id in self.employee_ids]

    def get_employee_data(self):
        return pd.DataFrame([emp.to_dict() for emp in self.employees])

In [None]:
class Course:
    course_titles = ['ML Basics', 'Advanced Data Analysis', 'Software Development', 'Leadership Training', 'Statistics 101', 
                     'Deep Learning', 'Data Engineering', 'Time Series Analysis', 'Natural Language Processing', 
                     'Convolutional Neural Networks', 'Reinforcement Learning', 'Unsupervised Learning Techniques', 
                     'Feature Engineering and Selection', 'Model Deployment and Monitoring', 'Bayesian Statistics and Inference', 
                     'Linear Algebra Refresher', 'Optimization Techniques', 'Probabilistic Graphical Models', 'Stochastic Processes', 
                     'Agile and Scrum Methodologies', 'Git and Version Control', 'Design Patterns in Python', 
                     'Containerization with Docker', 'Cloud Computing with AWS/Azure/GCP', 'Big Data with Spark and Hadoop', 
                     'Cybersecurity Basics for Data Scientists', 'Business Intelligence and Reporting Tools', 
                     'Ethics in AI and ML', 'Product Management for Technical Leaders','Calculus for Machine Learning', 'Multivariate Statistics', 'Mathematical Optimization', 
                     'Graph Theory and Applications', 'Topology in Data Analysis', 'Information Theory Basics', 'Generative Adversarial Networks', 
                     'Transfer Learning Techniques', 'Few-shot and Zero-shot Learning', 'Neural Architecture Search', 'Sequence to Sequence Models', 'Transformer Architectures and BERT', 
                     'Test-Driven Development (TDD)', 'Continuous Integration and Continuous Deployment (CI/CD)', 'Microservices Architecture', 
                     'Full Stack Development with Python', 'Asynchronous Programming in Python', 'Functional Programming with Python', 'Quantum Machine Learning', 
                     'Edge Computing and ML', 'Machine Learning for IoT', 'Medical Image Analysis with DL', 'Audio and Speech Processing', 
                     'Anomaly Detection Techniques', 'Visualizations with D3.js and Python', 'Data Governance and Management', 
                     'Quantum Computing Basics', 'Blockchain for Data Scientists', 'Automated Machine Learning (AutoML)', 
                     'MLOps: ML Engineering Best Practices', 'Bias and Fairness in AI', 'Philosophy of AI and Machine Learning', 
                     'AI for Social Good', 'TensorFlow 2.x and Keras Deep Dive', 'PyTorch for Deep Learning', 'Scikit-learn Advanced Techniques', 
                     'SQL and NoSQL Databases for Data Scientists', 'Data Streaming with Kafka and Python', 'FinTech and Algorithmic Trading', 
                     'ML in Healthcare and Bioinformatics', 'Natural Language Understanding (NLU)', 'AI in Gaming and Simulation', 'ML for Geospatial Analysis']
    course_types = ['Online', 'Workshop', 'Seminar']

    def __init__(self, emp_id, start_date_employee, end_date_range):
        self.emp_id = emp_id
        self.title = random.choice(Course.course_titles)
        self.completion_rate = random.randint(0, 101) / 100
        self.type = random.choice(Course.course_types)
        self.start_date = start_date_employee + timedelta(days=random.randint(0, (end_date_range - start_date_employee).days))
        self.end_date = self._calculate_end_date(end_date_range)

    def _calculate_end_date(self, end_date_range):
        course_end_date = self.start_date + timedelta(days=random.randint(0, 365))
        return course_end_date if course_end_date < end_date_range else None

    def to_dict(self):
        return {
            'Employee ID': self.emp_id,
            'Course Title': self.title,
            'Course Completion Rate': self.completion_rate,
            'Type of Course': self.type,
            'Course Start Date': self.start_date,
            'Course Completion Date': self.end_date
        }

In [None]:
class CourseManager:
    def __init__(self, employees):
        self.courses = []
        for emp in employees:
            num_courses = random.randint(1, 5)
            for _ in range(num_courses):
                start_date_employee = emp.start_date
                termination_date_employee = emp.termination_date
                end_date_range = termination_date_employee if termination_date_employee else datetime.now().date()
                self.courses.append(Course(emp.emp_id, start_date_employee, end_date_range))

    def get_course_data(self):
        return pd.DataFrame([course.to_dict() for course in self.courses])

In [None]:
class Leave:
    leave_types = ['Sick Leave', 'Vacation', 'Unpaid Leave', 'Maternity Leave', 'Paternity Leave']

    def __init__(self, emp_id):
        self.emp_id = emp_id
        self.frequency = random.randint(1, 12)
        self.type = random.choice(Leave.leave_types)
        self.duration = random.randint(1, 30)
        self.time_since_last_leave = random.randint(0, 365)

    def to_dict(self):
        return {
            'Employee ID': self.emp_id,
            'Leave Frequency': self.frequency,
            'Type of Leave': self.type,
            'Duration of Leave': self.duration,
            'Time Since Last Leave': self.time_since_last_leave
        }

In [None]:
class LeaveManager:
    def __init__(self, employees):
        self.leaves = []
        for emp in employees:
            num_leaves = random.randint(1, 10)
            for _ in range(num_leaves):
                self.leaves.append(Leave(emp.emp_id))

    def get_leave_data(self):
        return pd.DataFrame([leave.to_dict() for leave in self.leaves])

In [None]:
class Timesheet:
    def __init__(self, emp_id, current_date):
        self.emp_id = emp_id
        self.date = (current_date - timedelta(days=random.randint(0, 365))).date()
        self.overtime_logged = random.randint(0, 6)
        self.time_logged = 8 + self.overtime_logged

    def to_dict(self):
        return {
            'Employee ID': self.emp_id,
            'Timesheet Date': self.date,
            'Overtime Logged': self.overtime_logged,
            'Time Logged': self.time_logged
        }

In [None]:
class TimesheetManager:
    def __init__(self, employees, current_date):
        self.timesheets = []
        for emp in employees:
            num_logs = random.randint(200, 250)
            for _ in range(num_logs):
                self.timesheets.append(Timesheet(emp.emp_id, current_date))

    def get_timesheet_data(self):
        return pd.DataFrame([timesheet.to_dict() for timesheet in self.timesheets])

In [None]:
def combine_employee_data(n_employees):
    employee_manager = EmployeeManager(n_employees)
    course_manager = CourseManager(employee_manager.employees)
    leave_manager = LeaveManager(employee_manager.employees)
    timesheet_manager = TimesheetManager(employee_manager.employees, datetime.now())

    employee_data = employee_manager.get_employee_data()
    course_data = course_manager.get_course_data()
    leave_data = leave_manager.get_leave_data()
    timesheet_data = timesheet_manager.get_timesheet_data()

    combined_data = employee_data.merge(course_data, on='Employee ID', how='left')
    combined_data = combined_data.merge(leave_data, on='Employee ID', how='left')
    combined_data = combined_data.merge(timesheet_data, on='Employee ID', how='left')

    return combined_data