In [124]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler

importing and cleaning data

In [118]:
df = pd.read_csv("Data/GradersPerformanceBhmnSfnd.csv")
df = df[['ScorerId','AssessmentRunId','SpecifiefTime','ScoringTime','ConfirmStatusId']]
df.rename(columns={'ScorerId':'ScorerID','AssessmentRunId':'AssessmentID','SpecifiefTime':'IdealTime','ScoringTime':'Elapsed','ConfirmStatusId':'isConfirmed'},inplace=True)

calculating speed

In [119]:
df['Speed'] = 1 - df['Elapsed']/df['IdealTime']

In [120]:
df_pivot = df.pivot_table(index='ScorerID',columns='isConfirmed',values='AssessmentID',aggfunc='count')
df_pivot.fillna(0,inplace=True)

Making tables each method of scoring

In [121]:
original_df_pivot = df_pivot.copy()
robust_df_pivot = df_pivot.copy()

First method is quite simple, we just calculate each scorer's parameters, and then we normalize and scale them to be between 0 and 100

In [123]:
# Calculating the scaled zscore of each column
# Speed
original_df_pivot['Speed'] = df.groupby('ScorerID')['Speed'].mean()
original_df_pivot['Speed'].fillna(0,inplace=True)
original_df_pivot['Speed_zscore'] = (original_df_pivot['Speed']-original_df_pivot['Speed'].mean())/original_df_pivot['Speed'].std()
original_df_pivot['Speed_zscore_scaled'] = (original_df_pivot['Speed_zscore']-original_df_pivot['Speed_zscore'].min())/(original_df_pivot['Speed_zscore'].max()-original_df_pivot['Speed_zscore'].min())*100
# Accuracy (zscore is not necessary since the accuracy is scaled between 0 and 100 itself)
original_df_pivot['Accuracy'] = df_pivot[1]/(df_pivot[0]+df_pivot[1]) * 100
# Experience
SumConfirms_mean = df_pivot[1].mean()
SumConfirms_std = df_pivot[1].std()
original_df_pivot['Experience'] = (df_pivot[1]-SumConfirms_mean)/SumConfirms_std
original_df_pivot['Experience_zscore_scaled'] = (original_df_pivot['Experience']-original_df_pivot['Experience'].min())/(original_df_pivot['Experience'].max()-original_df_pivot['Experience'].min())*100

Second method is a bit more complicated, we use the RobustScaler because of the various outliers in the data, then we clip the values to be between 0 and 100

In [132]:
# preparing the RobustScaler
scaler = RobustScaler()
# Speed
robust_df_pivot['Speed'] = df.groupby('ScorerID')['Speed'].mean()
robust_df_pivot['Speed'].fillna(0,inplace=True)
speed_fit_scaled = scaler.fit_transform(robust_df_pivot[['Speed']])
speed_fit_scaled = np.clip(speed_fit_scaled,-1,1)
speed_fit_scaled = 50 + speed_fit_scaled*50
robust_df_pivot['Speed_robust_scaled'] = speed_fit_scaled

# Accuracy
robust_df_pivot['Accuracy'] = df_pivot[1]/(df_pivot[0]+df_pivot[1]) * 100

# Experience
experience_fit_scaled = scaler.fit_transform(robust_df_pivot[[1]])
experience_fit_scaled = np.clip(experience_fit_scaled,-1,1)
experience_fit_scaled = 50 + experience_fit_scaled*50
robust_df_pivot['Experience_robust_scaled'] = experience_fit_scaled

Now we give weight to each parameter

In [139]:
Accuracy_weight = 0.6
Experience_weight = 0.1
Speed_weight= 0.3

At last we calculate the final score

In [136]:
robust_df_pivot['FinalScore'] = robust_df_pivot['Speed_robust_scaled']*Speed_weight + robust_df_pivot['Accuracy']*Accuracy_weight + robust_df_pivot['Experience_robust_scaled']*Experience_weight
robust_df_pivot.describe()

isConfirmed,0,1,Speed,Speed_robust_scaled,Accuracy,Experience_robust_scaled,FinalScore,FinalScore_Alireza
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,5.43625,454.3825,-1.864347,41.62021,98.30355,63.19599,77.787792,73.198247
std,21.182642,1550.772671,5.513103,27.498584,7.414903,23.833865,11.075352,12.921112
min,0.0,0.0,-110.6,0.0,0.0,41.332471,4.133247,6.199871
25%,0.0,6.0,-1.978571,15.258316,99.192508,42.884864,68.582937,61.563518
50%,0.0,33.5,-0.582925,50.0,100.0,50.0,80.231364,75.466853
75%,2.0,199.25,0.030034,65.258316,100.0,92.884864,87.050121,84.35962
max,277.0,21865.0,0.825,85.047312,100.0,100.0,94.248759,93.290219


Now we normalize and scale the final score to be between 0 and 100

In [138]:
original_df_pivot['FinalScore'] = original_df_pivot['Speed_zscore_scaled']*Speed_weight + original_df_pivot['Accuracy']*Accuracy_weight + original_df_pivot['Experience_zscore_scaled']*Experience_weight
original_df_pivot.describe()

isConfirmed,0,1,Speed,Speed_zscore,Speed_zscore_scaled,Accuracy,Experience,Experience_zscore_scaled,FinalScore,FinalScore_Alireza
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,5.43625,454.3825,-1.864347,-5.721118000000001e-17,97.586406,98.30355,2.7755579999999997e-19,2.078127,88.465865,83.618736
std,21.182642,1550.772671,5.513103,1.0,4.947815,7.414903,1.0,7.092489,4.777389,4.290579
min,0.0,0.0,-110.6,-19.72313,0.0,0.0,-0.2930039,0.0,28.756563,33.549323
25%,0.0,6.0,-1.978571,-0.02071877,97.483894,99.192508,-0.2891349,0.027441,88.683817,83.656846
50%,0.0,33.5,-0.582925,0.2324321,98.736437,100.0,-0.2714018,0.153213,89.405365,84.360086
75%,2.0,199.25,0.030034,0.3436143,99.286546,100.0,-0.1645196,0.911274,89.768291,84.768124
max,277.0,21865.0,0.825,0.48781,100.0,100.0,13.80642,100.0,99.552301,99.607637
