### Importing libraries :

In [11]:
import pandas as pd
import numpy as np
import warnings 

warnings.filterwarnings("ignore")

In [269]:
from pulp import *

### Importing the csv files created: 

In [387]:
user_scores =  pd.read_csv("Job_scoring_users.csv")
user_preferences = pd.read_csv("user_preferences.csv")

In [389]:
user_preferences

Unnamed: 0,Candidate,Panchkula,Ambala,Faridabad,Gurgaon,Panipat
0,A,1,2,5,4,3
1,B,2,1,5,4,3
2,C,2,3,1,5,4
3,D,2,3,5,1,4
4,E,1,2,5,4,3
5,F,1,1,4,3,4
6,G,2,3,5,4,1
7,H,2,1,5,4,3
8,I,1,2,5,4,3
9,J,1,2,5,4,3


### Deciding the scoring method: 

In [390]:
users = list(user_scores.Candidate.values)
jobs = list( user_scores.columns[1:user_scores.shape[1]].values)
user_scores_array = user_scores.iloc[:,range(1,user_scores.shape[1])].values

In [391]:
print( "  Maximum user score is : " ,user_scores_array.max())

  Maximum user score is :  8


The maximum score provided for scoring the user (without considering the user preferences) is 58

If we build our user preferences, scoring such that the difference between any 2 levels is more than this maximum score, it'll ensure that user preferences are of foremost priority in job matching. That is, even if a user is scored very low then other location user will not be selected

In [392]:
max_score = 10

In [393]:
user_preferences = user_preferences.replace({
    1:5 * max_score, 
    2:4 * max_score,
    3:3 * max_score,
    4:2 * max_score,
    5:1 * max_score})
user_preferences_num_cols =  user_preferences.iloc[:,range(1,user_scores.shape[1])]

In [394]:
user_preferences_num_cols

Unnamed: 0,Panchkula,Ambala,Faridabad,Gurgaon,Panipat
0,50,40,10,20,30
1,40,50,10,20,30
2,40,30,50,10,20
3,40,30,10,50,20
4,50,40,10,20,30
5,50,50,20,30,20
6,40,30,10,20,50
7,40,50,10,20,30
8,50,40,10,20,30
9,50,40,10,20,30


In [292]:
#user_preferences = user_preferences.replace({1:1,2:0.1,3:0.01,4:0.001,5:0.0001})*10000
#user_preferences_num_cols =  user_preferences.iloc[:,range(1,user_scores.shape[1])]

In [395]:
user_preferences_0_array =  user_preferences_num_cols.values

### Decision variable: 

In [397]:
prob = LpProblem("Matching Jobs", LpMaximize)
y = LpVariable.dicts("pair", [(i,j)  for i in range(len(users)) for j in range(len(jobs)) ] ,cat='Binary')

### Maximization problem : 

In [398]:
prob += lpSum([ (user_preferences_0_array[i][j] + user_scores_array[i][j]) * y[(i,j)] for i in range(len(users)) for j in range(len(jobs)) ])

### Contraints:

In [399]:
## each person should be given only one job 

for i in range(len(users)):
    prob += lpSum(y[(i,j)] for j in range(len(jobs))) <= 1

## place constraints: 
for j in [0]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 4
                  
for j in [1]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 2     
                  
for j in [2]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 2     
                  
for j in [3]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 2     

for j in [4]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 3     
    
prob.solve()

1

### Creating a record of all the score values :

In [413]:
scores_array = user_preferences_0_array.copy()
scores_array[:,:] = 0

for i in range(len(users)):
    for j in range(len(jobs)):
        scores_array[i,j] = (user_preferences_0_array[i][j] + user_scores_array[i][j] ) 

## Presenting results with the cells highlighted 

##### Creating array of matched values: 

In [401]:
matches_array =  user_preferences_0_array.copy()
matches_array[:,:] = 0

for i in range(len(users)):
    for j in range(len(jobs)):
        matches_array[i,j] =  y[(i,j)].varValue 
        
matches_df =  pd.DataFrame(matches_array, columns =  user_scores.columns[range(1,user_scores.shape[1] )] )
matches_df['Candidate'] = user_scores["Candidate"]
matches_df =matches_df[user_scores.columns]
matches_df

Unnamed: 0,Candidate,Panchkula,Ambala,Faridabad,Gurgaon,Panipat
0,A,0,0,0,0,0
1,B,0,1,0,0,0
2,C,0,0,1,0,0
3,D,0,0,0,1,0
4,E,1,0,0,0,0
5,F,0,0,0,0,0
6,G,0,0,0,0,1
7,H,0,1,0,0,0
8,I,0,0,0,0,1
9,J,0,0,0,0,0


### Combining users preferences and scores :

In [402]:
results_df =  user_scores.copy()
for colnames  in results_df.columns[range(1,len(results_df.columns))]:
    results_df[colnames] = user_scores[colnames].astype('str') + "," + user_preferences[colnames].astype('str')

### Results with coloured tiles : 

In [403]:

style_df = (
        matches_df == 1                  # Compare DataFrames
).replace({
    True: 'background-color:yellow',  # True Styles
    False: ''                      # False Styles
})

results_df =  results_df.style.apply(lambda _: style_df, axis=None)
results_df

Unnamed: 0,Candidate,Panchkula,Ambala,Faridabad,Gurgaon,Panipat
0,A,250,240,210,220,230
1,B,440,450,410,420,430
2,C,540,530,550,510,520
3,D,240,230,210,250,220
4,E,650,640,610,620,630
5,F,250,250,220,230,220
6,G,440,430,410,420,450
7,H,840,850,810,820,830
8,I,450,440,410,420,430
9,J,350,340,310,320,330


### Caste constraints : 

Introducing A, F, J  as SC:  

In [404]:
user_SC_index =  [0,5,9]

Setting 2 as number of SC required : 

In [405]:
SC_required =  2

### Decision variable: 

In [406]:
prob = LpProblem("Matching Jobs", LpMaximize)
y = LpVariable.dicts("pair", [(i,j)  for i in range(len(users)) for j in range(len(jobs)) ] ,cat='Binary')

### Maximization problem : 

In [407]:
prob += lpSum([ (user_preferences_0_array[i][j] + user_scores_array[i][j]) * y[(i,j)] for i in range(len(users)) for j in range(len(jobs)) ])

### Contraints:

In [408]:
## each person should be given only one job 

for i in range(len(users)):
    prob += lpSum(y[(i,j)] for j in range(len(jobs))) <= 1

## place constraints: 
for j in [0]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 4
                  
for j in [1]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 2     
                  
for j in [2]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 2     
                  
for j in [3]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 2     

for j in [4]:
    prob += lpSum(y[(i,j)] for i in range(len(users))) == 3    
    
    
## SC constraints: 

prob += lpSum([  y[(i,j)] for i in user_SC_index for j in range(len(jobs))  ]) == SC_required
    
    
prob.solve()

1

In [409]:
matches_array =  user_preferences_0_array.copy()
matches_array[:,:] = 0

for i in range(len(users)):
    for j in range(len(jobs)):
        matches_array[i,j] =  y[(i,j)].varValue 
        
matches_df =  pd.DataFrame(matches_array, columns =  user_scores.columns[range(1,user_scores.shape[1] )] )
matches_df['Candidate'] = user_scores["Candidate"]
matches_df =matches_df[user_scores.columns]
matches_df

results_df =  user_scores.copy()
for colnames  in results_df.columns[range(1,len(results_df.columns))]:
    results_df[colnames] = user_scores[colnames].astype('str') + "," + user_preferences[colnames].astype('str')

    
style_df = (
        matches_df == 1                  # Compare DataFrames
).replace({
    True: 'background-color:yellow',  # True Styles
    False: ''                      # False Styles
})    
    

results_df =  results_df.style.apply(lambda _: style_df, axis=None)
results_df

Unnamed: 0,Candidate,Panchkula,Ambala,Faridabad,Gurgaon,Panipat
0,A,250,240,210,220,230
1,B,440,450,410,420,430
2,C,540,530,550,510,520
3,D,240,230,210,250,220
4,E,650,640,610,620,630
5,F,250,250,220,230,220
6,G,440,430,410,420,450
7,H,840,850,810,820,830
8,I,450,440,410,420,430
9,J,350,340,310,320,330
