In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

### Importing and cleaning data

In [2]:
df = pd.read_csv("GradersPerformanceBhmnSfnd.csv")
df = df[['ScorerId','ScorerReviewId','SpecifiefTime','ScoringTime','ConfirmStatusId']]
df.rename(columns={'ScorerId':'ScorerID','ScorerReviewId':'ScorerReviewID','SpecifiefTime':'IdealTime','ScoringTime':'Elapsed','ConfirmStatusId':'isConfirmed'},inplace=True)

### Calculating speed

In [3]:
df['Speed'] = 1 - df['Elapsed']/df['IdealTime']

In [4]:
df_pivot = df.pivot_table(index='ScorerID',columns='isConfirmed',values='ScorerReviewID',aggfunc='count')
df_pivot.fillna(0,inplace=True)

In [5]:
df_pivot['Speed'] = df.groupby('ScorerID')['Speed'].mean()
df_pivot['Speed'].fillna(0,inplace=True)
df_pivot['Speed'] = df_pivot['Speed'].apply(lambda x: x if x>0 else 0)
df_pivot['Speed_zscore'] = (df_pivot['Speed']-df_pivot['Speed'].mean())/df_pivot['Speed'].std()
df_pivot['Speed_zscore_scaled'] = (df_pivot['Speed_zscore']-df_pivot['Speed_zscore'].min())/(df_pivot['Speed_zscore'].max()-df_pivot['Speed_zscore'].min())*100
df_pivot['Speed_zscore'].describe()

count    8.000000e+02
mean    -2.244038e-16
std      1.000000e+00
min     -4.871093e-01
25%     -4.871093e-01
50%     -4.871093e-01
75%     -3.043772e-01
max      4.532301e+00
Name: Speed_zscore, dtype: float64

In [6]:
df_pivot['Accuracy'] = df_pivot[1]/(df_pivot[0]+df_pivot[1]) * 100
Accuracy_mean = df_pivot['Accuracy'].mean()
Accuracy_std = df_pivot['Accuracy'].std()
df_pivot['Accuracy_zscore'] = (df_pivot['Accuracy']-Accuracy_mean)/Accuracy_std

#### A description of the skewness and kurtosis of the data

In [7]:
print(df_pivot['Accuracy_zscore'].skew() , df_pivot['Accuracy_zscore'].kurt())

-9.092329637772492 100.0027942624753


In [8]:
df_pivot['Accuracy_zscore'].describe()

count    8.000000e+02
mean    -1.523431e-14
std      1.000000e+00
min     -1.325756e+01
25%      1.198881e-01
50%      2.287893e-01
75%      2.287893e-01
max      2.287893e-01
Name: Accuracy_zscore, dtype: float64

### Same for Experience

In [9]:
NofAllConfirms_mean = df_pivot[1].mean()
NofAllConfirms_std = df_pivot[1].std()
df_pivot['Experience_zscore'] = (df_pivot[1]-NofAllConfirms_mean)/NofAllConfirms_std
df_pivot['Experience_zscore_scaled'] = (df_pivot['Experience_zscore']-df_pivot['Experience_zscore'].min())/(df_pivot['Experience_zscore'].max()-df_pivot['Experience_zscore'].min())*100
df_pivot['Experience_zscore'].describe()

count    8.000000e+02
mean     2.775558e-19
std      1.000000e+00
min     -2.930039e-01
25%     -2.891349e-01
50%     -2.714018e-01
75%     -1.645196e-01
max      1.380642e+01
Name: Experience_zscore, dtype: float64

### Now we give weight to each parameter

In [10]:
Accuracy_weight = 0.5
Experience_weight = 0.35
Speed_weight = 0.15

### At last we calculate the final score

In [11]:
df_pivot['FinalScore'] = Accuracy_weight*df_pivot['Accuracy'] + Experience_weight*df_pivot['Experience_zscore_scaled'] + Speed_weight*df_pivot['Speed_zscore_scaled']
df_pivot['FinalScore'].describe()

count    800.000000
mean      51.334796
std        6.091534
min        0.000000
25%       50.003201
50%       50.025871
75%       51.213075
max       95.769094
Name: FinalScore, dtype: float64