In [17]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from src.lfgp import LFGP
from collections import defaultdict

In [18]:
np.random.seed(0)

n_jobs = 50  # Number of job types
n_workers = 50  # Number of worker types
k = 6  # Dimension of the latent factor vector

# Initialize the latent ability estimate for each worker as a uniform probability distribution
# Generate the latent factor vector for jobs
jobs_latent_factors = np.random.randint(2, size=(n_jobs, k))
jobs_df = pd.DataFrame(jobs_latent_factors, columns=[f'Ability_{i}' for i in range(1, k+1)])
jobs_df['Job Type'] = [f'Job Type_{i}' for i in range(1, n_jobs+1)]

# Generate the latent factor vector for workers
workers_latent_factors = np.random.randint(2, size=(n_workers, k))
workers_df = pd.DataFrame(workers_latent_factors, columns=[f'Skill_{i}' for i in range(1, k+1)])
workers_df['Worker Type'] = [f'Worker Type_{i}' for i in range(1, n_workers+1)]

# Display the generated data
print("Jobs Database:")
print(jobs_df)
print("\nWorkers Database:")
print(workers_df)

Jobs Database:
    Ability_1  Ability_2  Ability_3  Ability_4  Ability_5  Ability_6  \
0           0          1          1          0          1          1   
1           1          1          1          1          1          0   
2           0          1          0          0          0          0   
3           0          1          0          1          1          0   
4           0          1          1          1          1          0   
5           1          0          1          0          1          1   
6           0          1          1          0          0          1   
7           0          1          1          1          1          1   
8           0          1          0          1          1          1   
9           1          0          1          0          0          1   
10          1          0          1          0          1          0   
11          0          0          0          0          1          1   
12          0          0          0          1   

In [19]:
# Initialize LFGP model instance
model = LFGP(lf_dim=6, n_worker_group=5, lambda1=1, lambda2=1)

# Here we need to extract the latent factor vectors from workers_df and jobs_df, and convert them into a format suitable for the LFGP model
workers_lf = workers_df.values
jobs_lf = jobs_df.values

# Directly set the model's worker and job latent factor vectors, as well as the corresponding group information
model.A = jobs_lf  # Assume A represents the latent factors of jobs
model.B = workers_lf  # Assume B represents the latent factors of workers

# Assume each worker and job is randomly assigned to a group
np.random.seed(42)
model.U = np.random.randint(0, 5, size=(n_jobs,))  # Job grouping
model.V = np.random.randint(0, 5, size=(n_workers,))  # Worker grouping

print("Job group information:", model.U)
print("Worker group information:", model.V)


Job group information: [3 4 2 4 4 1 2 2 2 4 3 2 4 1 3 1 3 4 0 3 1 4 3 0 0 2 2 1 3 3 2 3 3 0 2 4 2
 4 0 1 3 0 3 1 1 0 1 4 1 3]
Worker group information: [3 3 3 4 2 0 3 1 3 1 1 3 4 1 1 3 1 1 3 3 0 4 4 1 4 1 0 3 3 3 4 0 4 4 0 0 0
 0 3 2 2 0 2 2 0 2 4 1 1 0]


In [20]:
# Hypothetical group information for jobs and workers
jobs_group_info = np.array(model.U)
workers_group_info = np.array(model.V)

# Identify high-quality worker groups in each job group
def identify_high_quality_worker_groups(jobs_group_info, workers_group_info):
    # Build a mapping from job groups to worker groups
    job_to_workers_groups = defaultdict(lambda: defaultdict(int))
    
    for job_group, worker_group in zip(jobs_group_info, workers_group_info):
        job_to_workers_groups[job_group][worker_group] += 1
    
    high_quality_groups_for_jobs = {}
    
    for job_group, workers_groups in job_to_workers_groups.items():
        # Identify the worker group with the highest count in each job group as the high-quality group
        high_quality_group = max(workers_groups, key=workers_groups.get)
        high_quality_groups_for_jobs[job_group] = high_quality_group
    
    return high_quality_groups_for_jobs

# Execute the function to get the high-quality worker group for each job group
high_quality_groups = identify_high_quality_worker_groups(jobs_group_info, workers_group_info)

print("High-quality worker group for each job group:", high_quality_groups)

High-quality worker group for each job group: {3: 3, 4: 4, 2: 3, 1: 0, 0: 3}


In [21]:
matched_jobs = {}
for job_group, worker_group in high_quality_groups.items():
    jobs_in_group = [job_index for job_index, group in enumerate(jobs_group_info) if group == job_group]
    np.random.shuffle(jobs_in_group) 
    
    workers_in_group = [worker_index for worker_index, group in enumerate(workers_group_info) if group == worker_group]
    
    # Each worker is randomly matched to a job
    for worker_index in workers_in_group:
        if len(jobs_in_group) == 0:
            break  
        job_index = jobs_in_group.pop(0)  # Remove and get the first job in the list
        matched_jobs[worker_index] = job_index  

for worker_index, job_index in matched_jobs.items():
    print(f"worker {worker_index} is matched with job {job_index}")

worker 0 is matched with job 41
worker 1 is matched with job 23
worker 2 is matched with job 18
worker 6 is matched with job 33
worker 8 is matched with job 38
worker 11 is matched with job 24
worker 15 is matched with job 45
worker 18 is matched with job 7
worker 19 is matched with job 36
worker 27 is matched with job 26
worker 28 is matched with job 31
worker 29 is matched with job 28
worker 38 is matched with job 42
worker 3 is matched with job 21
worker 12 is matched with job 3
worker 21 is matched with job 9
worker 22 is matched with job 37
worker 24 is matched with job 35
worker 30 is matched with job 17
worker 32 is matched with job 12
worker 33 is matched with job 47
worker 46 is matched with job 1
worker 5 is matched with job 13
worker 20 is matched with job 46
worker 26 is matched with job 39
worker 31 is matched with job 15
worker 34 is matched with job 48
worker 35 is matched with job 44
worker 36 is matched with job 27
worker 37 is matched with job 20
worker 41 is matched 