Load the dataset 

In [77]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

advisorskpi=pd.read_csv("multi_employee_dataset.csv")

#Display the first few rows

print(advisorskpi.head())

        Week Employee_ID    AHT  Schedule_Adherence    ACW  RONA  \
0   1/1/2024      EMP001  16.26               93.08  0.140     2   
1   1/8/2024      EMP001  18.01               96.17  1.272     0   
2  1/15/2024      EMP001  17.78               86.93  1.476     0   
3  1/22/2024      EMP001  13.32               98.28  1.497     2   
4  1/29/2024      EMP001   7.93               85.78  1.311     0   

   Evaluation_Completed  Efficiency  Compliance  Logging  Professionalism  \
0                  True        85.0        95.0     95.0             95.0   
1                  True        95.0        95.0     90.0            100.0   
2                 False         NaN         NaN      NaN              NaN   
3                 False         NaN         NaN      NaN              NaN   
4                 False         NaN         NaN      NaN              NaN   

   Exceptions  Knowledge  Guidance  QS_Adoption_Score Performance_Level  \
0       100.0       95.0     100.0               95.0

Clean the dataset 

In [27]:
#Handle Missing Evaluation Scores for days with no evaluations

#Define the evaluation related colums
eval_cols=["QS_Adoption_Score", "Professionalism", "Logging", "Compliance", "Knowledge", "Efficiency", "Exceptions", "Guidance"]

#Replace NaN with "No Evaluation"
advisorskpi[eval_cols]=advisorskpi[eval_cols].fillna("No Evaluation")

In [28]:
#print Data types 

print(advisorskpi.dtypes)

Week                     object
Employee_ID              object
AHT                     float64
Schedule_Adherence      float64
ACW                     float64
RONA                      int64
Evaluation_Completed       bool
Efficiency               object
Compliance               object
Logging                  object
Professionalism          object
Exceptions               object
Knowledge                object
Guidance                 object
QS_Adoption_Score        object
Performance_Level        object
Needs_Coaching           object
dtype: object


In [29]:
#convert numeric columns to proper data 

numeric_cols=[ "AHT", "Schedule_Adherence", "ACW","RONA","Efficiency", "Compliance", "Logging", "Professionalism", "Exceptions", "Knowledge", "Guidance","QS_Adoption_Score"]

# Convert numeric columns to numeric type
advisorskpi[numeric_cols]=advisorskpi[numeric_cols].apply(pd.to_numeric, errors="coerce")



In [30]:
#remove the duplicate 

advisorskpi_no_duplicates=advisorskpi.drop_duplicates()


In [31]:
#identify categorial columns 

categorical_cols = advisorskpi.select_dtypes(include='object').columns

print("Categorical columns:", categorical_cols)

Categorical columns: Index(['Week', 'Employee_ID', 'Performance_Level', 'Needs_Coaching'], dtype='object')


Save the cleaned data 

In [33]:
#Save cleaned Dataset to a CSV file

advisorskpi.to_csv("cleaned_advisorskpi.csv", index=False)

In [34]:
#Load the cleaned CSV file 

import pandas as pd 

cleaned_advisorskpi=pd.read_csv("cleaned_advisorskpi.csv")

Create target variables Needs Coaching 

In [36]:

#Make a copy of the dataset
cleaned_advisorskpi_target=cleaned_advisorskpi.copy()

#Create a default colum with value 0 (No coaching need)
cleaned_advisorskpi_target["NEEDS_COACHING"]=0

#identify rows that have actual evaluations 
evaluated_rows=cleaned_advisorskpi_target["Professionalism"]!='No Evaluation'

#Convert evaluations columns to numeric for those rows only 
cleaned_advisorskpi_target.loc[evaluated_rows,[ "AHT", "Schedule_Adherence", "ACW","RONA","Efficiency", "Compliance", "Logging", "Professionalism", "Exceptions", "Knowledge", "Guidance","QS_Adoption_Score"]]=(
 cleaned_advisorskpi_target.loc[evaluated_rows,[ "AHT", "Schedule_Adherence", "ACW","RONA","Efficiency", "Compliance", "Logging", "Professionalism", "Exceptions", "Knowledge", "Guidance","QS_Adoption_Score"]].astype(float))

#Apply coaching rule- flag as 1 if any score <90
# Fixed: Using .loc[] properly with row and column selectors
cleaned_advisorskpi_target.loc[
    evaluated_rows & (cleaned_advisorskpi_target["QS_Adoption_Score"]<90), "NEEDS_COACHING"] = 1
cleaned_advisorskpi_target.loc[
    evaluated_rows & (cleaned_advisorskpi_target["QS_Adoption_Score"]>90), "NEEDS_COACHING"] = 0


#Preview the result 
cleaned_advisorskpi_target[["QS_Adoption_Score", "NEEDS_COACHING"]].head()


Unnamed: 0,QS_Adoption_Score,NEEDS_COACHING
0,95.0,0
1,95.0,0
2,,0
3,,0
4,,0


In [37]:

# proper DataFrame assignment
cleaned_advisorskpi_target.loc[
    evaluated_rows & (cleaned_advisorskpi_target["QS_Adoption_Score"]<90), "NEEDS_COACHING"] = 1
cleaned_advisorskpi_target.loc[
    evaluated_rows & (cleaned_advisorskpi_target["QS_Adoption_Score"]>90), "NEEDS_COACHING"] = 0

#Preview the result 
cleaned_advisorskpi_target[["QS_Adoption_Score", "NEEDS_COACHING"]].head()

#Print the result
cleaned_advisorskpi_target[["QS_Adoption_Score", "NEEDS_COACHING"]]

Unnamed: 0,QS_Adoption_Score,NEEDS_COACHING
0,95.00,0
1,95.00,0
2,,0
3,,0
4,,0
...,...,...
2595,83.29,1
2596,90.00,0
2597,,0
2598,90.71,0


Train the model to find which Kpis are contributing to low QS

In [65]:
#Select input features and target variables 

features=["AHT", "ACW", "Schedule_Adherence", "RONA"]

#Indepedent variables
x=cleaned_advisorskpi_target[features]

#Target :1 = Low QS, 0= High QS
y=(cleaned_advisorskpi_target["QS_Adoption_Score"]<90).astype(int)

Handle Missing Data

In [67]:
x=x.dropna()
y=y.loc[x.index]

Scale the Features

In [79]:
scaler= StandardScaler()
x_scaled= scaler.fit_transform(x)

Split into Training and Test Sets

In [83]:
x_train, x_test, y_train, y_test=train_test_split(x_scaled, y, test_size=0.3, random_state=42)

Train the Model

In [85]:
model=RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

Make Predictions and Evaluate 

In [87]:
y_pred=model.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       736
           1       0.00      0.00      0.00        44

    accuracy                           0.94       780
   macro avg       0.47      0.50      0.48       780
weighted avg       0.89      0.94      0.91       780



Check feature Importance 

In [89]:
feature_importance=pd.Series(model.feature_importances_, index=features)
print(feature_importance.sort_values(ascending=False))

AHT                   0.340691
Schedule_Adherence    0.329232
ACW                   0.278857
RONA                  0.051220
dtype: float64
