In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from scipy.optimize import linear_sum_assignment

# Step 1: Load and preprocess data
def load_data():
    # Example dataset
   data = {
    'worker_id': [101, 102, 103, 104, 105],
    'role': ['Doctor', 'Nurse', 'Doctor', 'Lab Technician', 'Nurse'],
    'tasks': [['Patient Checkup', 'Prescription'], 
              ['Vitals Monitoring', 'Patient Care'], 
              ['Patient Checkup', 'Diagnosis'], 
              ['Sample Collection', 'Lab Testing'], 
              ['Patient Care', 'Medication Administration']],
    'availability_hours': [8, 6, 4, 7, 5],
    'experience_level': ['Senior', 'Intermediate', 'Junior', 'Intermediate', 'Senior']
}

    df = pd.DataFrame(data)
    return df

# Encode categorical features
def preprocess_data(df):
    df_encoded = pd.get_dummies(df, columns=['role', 'experience_level'], drop_first=True)
    return df_encoded

# Step 2: Assign tasks and balance schedules
def balance_work_schedule(df):
    # Create a dummy task-time matrix for example purposes
    tasks = [task for task_list in df['tasks'] for task in task_list]
    task_time = {
        'UI Fix': 2,
        'Bug Fix': 1,
        'API Development': 4,
        'Database Setup': 3,
        'Integration': 5,
        'Debugging': 2
    }

    # Create a cost matrix where rows are users and columns are tasks
    cost_matrix = []
    for _, row in df.iterrows():
        user_cost = []
        for task in tasks:
            if task in row['tasks']:
                user_cost.append(task_time[task])
            else:
                user_cost.append(float('inf'))  # Penalize tasks not in user skill set
        cost_matrix.append(user_cost)

    cost_matrix = np.array(cost_matrix)

    # Use linear_sum_assignment to assign tasks optimally
    row_ind, col_ind = linear_sum_assignment(cost_matrix)

    # Prepare the assignment result
    assignments = []
    for user_idx, task_idx in zip(row_ind, col_ind):
        if cost_matrix[user_idx, task_idx] != float('inf'):
            assignments.append({
                'worker_id': df.iloc[user_idx]['user_id'],
                'tasks': tasks[task_idx],
                'time_required': task_time[tasks[task_idx]]
            })

    # Summarize daily schedule
    schedule = pd.DataFrame(assignments)
    return schedule

# Main function
def main():
    # Load data
    df = load_data()
    print("Initial Data:")
    print(df.head())

    # Preprocess data
    df_encoded = preprocess_data(df)
    print("\nEncoded Data:")
    print(df_encoded.head())

    # Balance work schedule
    print("\nBalanced Work Schedule:")
    schedule = balance_work_schedule(df)
    print(schedule)

if __name__ == "__main__":
    main()


Initial Data:
   user_id       role                     tasks  availability_hours  \
0        1   Frontend         [UI Fix, Bug Fix]                   8   
1        2    Backend         [API Development]                   6   
2        3   Frontend                  [UI Fix]                   4   
3        4    Backend          [Database Setup]                   7   
4        5  Fullstack  [Integration, Debugging]                   5   

  experience_level  
0     Intermediate  
1           Senior  
2           Junior  
3           Senior  
4     Intermediate  

Encoded Data:
   user_id                     tasks  availability_hours  role_Frontend  \
0        1         [UI Fix, Bug Fix]                   8           True   
1        2         [API Development]                   6          False   
2        3                  [UI Fix]                   4           True   
3        4          [Database Setup]                   7          False   
4        5  [Integration, Debugging]       