In [217]:
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

def take_a_look (data_to_look):
    print(data_to_look.info())
    print()
    print(data_to_look.describe())

def correlation_columns (data, col_1, col_2):
    corr_value = data[col_1].corr(data[col_2])
    return round(corr_value, 4) * 100

def get_subset(data, column, threshold, condition):
    """
    Parameters:
    data: data to filter -> Dtype: data frame
    column: filter based on this column -> Dtyle: string
    threshold: value above/below you want to filter the data -> Dtype: float
    condition: takes two possible values (greater than or equal to / less than) -> Dtype: string
    """
    if condition == '>=':
        mask = data[column] >= threshold
        data_mask = data[mask]

    else:
        mask = data[column] < threshold
        data_mask = data[mask]

    return data_mask

In [2]:
data = pd.read_csv('csr_analysis.csv')
data.head()

Unnamed: 0,Employee_ID,Employee_Name,Total_Tickets_Score,Avg_Resolution_Time,Customer_Satisfaction,Escalation_Avoidance,FCR_Rate,Attendance
0,1,Zoro Roronoa,95,20,98,90,92,98
1,2,Alice Johnson,88,25,92,85,88,95
2,3,Bob Smith,70,45,85,80,75,90
3,4,Carol Lee,92,18,95,93,90,97
4,5,David Kim,65,55,80,78,70,88


In [3]:
take_a_look(data)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Employee_ID            20 non-null     int64 
 1   Employee_Name          20 non-null     object
 2   Total_Tickets_Score    20 non-null     int64 
 3   Avg_Resolution_Time    20 non-null     int64 
 4   Customer_Satisfaction  20 non-null     int64 
 5   Escalation_Avoidance   20 non-null     int64 
 6   FCR_Rate               20 non-null     int64 
 7   Attendance             20 non-null     int64 
dtypes: int64(7), object(1)
memory usage: 1.4+ KB
None

       Employee_ID  Total_Tickets_Score  Avg_Resolution_Time  \
count     20.00000            20.000000            20.000000   
mean      10.50000            79.700000            34.400000   
std        5.91608            10.498621            11.618497   
min        1.00000            60.000000            18.000000   
25%        5

In [231]:
numerical_columns = ['Total_Tickets_Score', 'Avg_Resolution_Time', 'Customer_Satisfaction', 'Escalation_Avoidance', 'FCR_Rate', 'Attendance']
data_numerical = data[numerical_columns]
data_numerical.head()

Unnamed: 0,Total_Tickets_Score,Avg_Resolution_Time,Customer_Satisfaction,Escalation_Avoidance,FCR_Rate,Attendance
0,95,20,98,90,92,98
1,88,25,92,85,88,95
2,70,45,85,80,75,90
3,92,18,95,93,90,97
4,65,55,80,78,70,88


In [5]:
data_numerical.corr()

Unnamed: 0,Total_Tickets_Score,Avg_Resolution_Time,Customer_Satisfaction,Escalation_Avoidance,FCR_Rate,Attendance
Total_Tickets_Score,1.0,-0.975843,0.973955,0.944499,0.941324,0.976304
Avg_Resolution_Time,-0.975843,1.0,-0.971302,-0.945582,-0.958456,-0.959381
Customer_Satisfaction,0.973955,-0.971302,1.0,0.928021,0.953567,0.965796
Escalation_Avoidance,0.944499,-0.945582,0.928021,1.0,0.937274,0.943588
FCR_Rate,0.941324,-0.958456,0.953567,0.937274,1.0,0.929471
Attendance,0.976304,-0.959381,0.965796,0.943588,0.929471,1.0


In [6]:
data['Productivity_Score'] = (0.4 * data['Total_Tickets_Score']) + (0.2 * data['Avg_Resolution_Time']) + (0.15 * data['Customer_Satisfaction']) +0.1 * (data['Escalation_Avoidance']) + (0.1 * data['FCR_Rate']) + (0.05 * data['Attendance'])

data.head()

Unnamed: 0,Employee_ID,Employee_Name,Total_Tickets_Score,Avg_Resolution_Time,Customer_Satisfaction,Escalation_Avoidance,FCR_Rate,Attendance,Productivity_Score
0,1,Zoro Roronoa,95,20,98,90,92,98,79.8
1,2,Alice Johnson,88,25,92,85,88,95,76.05
2,3,Bob Smith,70,45,85,80,75,90,69.75
3,4,Carol Lee,92,18,95,93,90,97,77.8
4,5,David Kim,65,55,80,78,70,88,68.2


In [15]:
# low attendance < 92.5, high >= 92.5

mask_high = data_numerical['Attendance'] >= 92.5
data_high_attendance = data_numerical[mask_high]

correlation_high_attendance = correlation_columns(data_high_attendance, 'Attendance', 'FCR_Rate')

print("The correlation between high attendance and FCR is: ", correlation_high_attendance)

The correlation between high attendance and FCR is:  0.8489734267324621


In [17]:
# low attendance < 92.5, high >= 92.5
mask_low = np.invert(mask_high)
data_low_attendance = data_numerical[mask_low]

correlation_low_attendance = correlation_columns(data_low_attendance, 'Attendance', 'FCR_Rate')

print("The correlation between low attendance and FCR is: ", correlation_low_attendance)

The correlation between low attendance and FCR is:  0.76470468009636


In [19]:
diff_corr = round((correlation_high_attendance - correlation_low_attendance), 2) * 100
print("Low attendance affected FCR by a factor of", diff_corr)

Low attendance affected FCR by a factor of 8.0


In [21]:
# does attendance correlate with avg resolution time
resolution_correlation_attendance = correlation_columns(data, 'Attendance', 'Avg_Resolution_Time')
resolution_correlation_high_attendance = correlation_columns(data_high_attendance, 'Attendance', 'Avg_Resolution_Time')
resolution_correlation_low_attendance = correlation_columns(data_low_attendance, 'Attendance', 'Avg_Resolution_Time')

print("The correlation between Attendance and Average Resolution Time is: ", resolution_correlation_attendance)
print("The correlation between High Attendance and Average Resolution Time is: ", resolution_correlation_high_attendance)
print("The correlation between Low Attendance and Average Resolution Time is: ", resolution_correlation_low_attendance)

diff_attendance_resolution_corr = round((resolution_correlation_high_attendance - resolution_correlation_low_attendance), 2) * 100

print("Low attendance affected Average Resolution Time by a factor of", diff_attendance_resolution_corr)

The correlation between Attendance and Average Resolution Time is:  -0.9593806989609532
The correlation between High Attendance and Average Resolution Time is:  -0.9188482839419769
The correlation between Low Attendance and Average Resolution Time is:  -0.8123587320037841
Low attendance affected Average Resolution Time by a factor of -11.0


In [49]:
mask_high_satis = data['Customer_Satisfaction'] >= 89.5
mask_low_satis = data['Customer_Satisfaction'] < 89.5

data_high_satisfaction = data[mask_high_satis]
data_low_satisfaction = data[mask_low_satis]

In [197]:
data_high_satis_high_attend = get_subset(data_high_satisfaction, 'Attendance', 92.5, '>=')
data_high_satis_low_attend = get_subset(data_high_satisfaction, 'Attendance', 92.5, '<')

data_low_satis_high_attend = get_subset(data_low_satisfaction, 'Attendance', 92.5, '>=')
data_low_satis_low_attend = get_subset(data_low_satisfaction, 'Attendance', 92.5, '<')

In [199]:
# throws error bc only one item
# correlation_columns(data_high_satis_low_attend, 'Customer_Satisfaction', 'Attendance')

#  throws error bc one item
# correlation_columns(data_low_satis_high_attend, 'Customer_Satisfaction', 'Attendance')

print(correlation_columns(data_high_satis_high_attend, 'Customer_Satisfaction', 'Attendance'))
print(correlation_columns(data_low_satis_low_attend, 'Customer_Satisfaction', 'Attendance'))

96.64
77.2


In [219]:
data_to_look = [data_high_satis_high_attend, data_high_satis_low_attend, data_low_satis_high_attend, data_low_satis_low_attend]
analysis = ['Employees with high satisfaction and high attendance', 'Employees with high satisfaction and low attendance', 'Employees with low satisfaction and high attendance', 'Employees with low satisfaction and low attendance']
idx = 0

for i in data_to_look:
    corr_value = correlation_columns(i, column_1, column_2)
    print("The correlation value for",analysis[idx],"is",corr_value)
    idx += 1

The correlation value for Employees with high satisfaction and high attendance is 96.64
The correlation value for Employees with high satisfaction and low attendance is nan
The correlation value for Employees with low satisfaction and high attendance is nan
The correlation value for Employees with low satisfaction and low attendance is 77.2


In [249]:
data['Efficiency'] = (data['Attendance'] * data['FCR_Rate'])/(data['Avg_Resolution_Time'])

print("Efficiency and Total Tickets:",correlation_columns(data, 'Efficiency', 'Total_Tickets_Score'))
print("Efficiency and Escalation Avoidance:",correlation_columns(data, 'Efficiency', 'Escalation_Avoidance'))
print("Efficiency and Customer Satisfaction",correlation_columns(data, 'Efficiency', 'Customer_Satisfaction'))

Efficiency and Total Tickets: 94.43
Efficiency and Escalation Avoidance: 89.78
Efficiency and Customer Satisfaction 95.00999999999999
