# Call Center Dataset Analysis

In [100]:
# Importing Libraries
import pandas as pd
data = pd.read_excel("Call-Center-Dataset.xlsx")

*Dataset Overview*

In [154]:
# Total rows in the Dataset
data.shape[0]

5000

In [102]:
print('The name of columns in the Dataset : ')
data.columns

The name of columns in the Dataset : 


Index(['Call Id', 'Agent', 'Date', 'Time', 'Topic', 'Answered (Y/N)',
       'Resolved', 'Speed of answer in seconds', 'AvgTalkDuration',
       'Satisfaction rating'],
      dtype='object')

In [104]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Call Id                     5000 non-null   object 
 1   Agent                       5000 non-null   object 
 2   Date                        5000 non-null   object 
 3   Time                        5000 non-null   object 
 4   Topic                       5000 non-null   object 
 5   Answered (Y/N)              5000 non-null   object 
 6   Resolved                    5000 non-null   object 
 7   Speed of answer in seconds  4054 non-null   float64
 8   AvgTalkDuration             4054 non-null   object 
 9   Satisfaction rating         4054 non-null   float64
dtypes: float64(2), object(8)
memory usage: 390.8+ KB


In [105]:
data.head(5)

Unnamed: 0,Call Id,Agent,Date,Time,Topic,Answered (Y/N),Resolved,Speed of answer in seconds,AvgTalkDuration,Satisfaction rating
0,ID0001,Diane,2021-01-01,09:12:58,Contract related,Y,Y,109.0,00:02:23,3.0
1,ID0002,Becky,2021-01-01,09:12:58,Technical Support,Y,N,70.0,00:04:02,3.0
2,ID0003,Stewart,2021-01-01,09:47:31,Contract related,Y,Y,10.0,00:02:11,3.0
3,ID0004,Greg,2021-01-01,09:47:31,Contract related,Y,Y,53.0,00:00:37,2.0
4,ID0005,Becky,2021-01-01,10:00:29,Payment related,Y,Y,95.0,00:01:00,3.0


## Data Cleaning  and Handling

*Duplicates Removal*

In [106]:
data.duplicated().sum()

np.int64(0)

*Handling Missing values*

In [107]:
data.isnull().sum()

Call Id                         0
Agent                           0
Date                            0
Time                            0
Topic                           0
Answered (Y/N)                  0
Resolved                        0
Speed of answer in seconds    946
AvgTalkDuration               946
Satisfaction rating           946
dtype: int64

In [108]:
data['Speed of answer in seconds'] = data['Speed of answer in seconds'].fillna(data['Speed of answer in seconds'].mean())

In [109]:
data['AvgTalkDuration'] = data['AvgTalkDuration'].astype(str)
data['AvgTalkDuration'] = pd.to_timedelta(data['AvgTalkDuration'])
data['AvgTalkDuration'] = data['AvgTalkDuration'].dt.total_seconds() / 60

In [110]:
data['AvgTalkDuration'] = data['AvgTalkDuration'].fillna(data['AvgTalkDuration'].mean())
data['AvgTalkDuration'] = data['AvgTalkDuration'].round(2)

In [111]:
data['Satisfaction rating'] = data['Satisfaction rating'].fillna(data['Satisfaction rating'].mean())
data['Satisfaction rating'] = data['Satisfaction rating'].round(2)

In [112]:
data.isnull().sum()

Call Id                       0
Agent                         0
Date                          0
Time                          0
Topic                         0
Answered (Y/N)                0
Resolved                      0
Speed of answer in seconds    0
AvgTalkDuration               0
Satisfaction rating           0
dtype: int64

*Data Type Conversion*

In [113]:
data.sample(5)

Unnamed: 0,Call Id,Agent,Date,Time,Topic,Answered (Y/N),Resolved,Speed of answer in seconds,AvgTalkDuration,Satisfaction rating
3777,ID3778,Joe,2021-03-07,13:53:46,Technical Support,N,N,67.52072,3.75,3.4
124,ID0125,Martha,2021-01-03,10:37:55,Technical Support,N,N,67.52072,3.75,3.4
3044,ID3045,Diane,2021-02-22,17:19:41,Admin Support,Y,Y,38.0,3.8,3.0
3359,ID3360,Dan,2021-02-28,13:09:07,Technical Support,Y,Y,65.0,2.25,5.0
3041,ID3042,Becky,2021-02-22,17:03:50,Admin Support,N,N,67.52072,3.75,3.4


In [114]:
data['Speed of answer in seconds'] = pd.to_timedelta(data['Speed of answer in seconds'], unit = 's').dt.total_seconds()
data['Speed of answer in seconds'] = data['Speed of answer in seconds'].round(2)

In [115]:
data['Date'] = pd.to_datetime(data['Date'])

In [116]:
data['Time'] = pd.to_datetime(data['Time'], format='%H:%M:%S')
data['Time'].dtypes

dtype('<M8[ns]')

In [117]:
data.dtypes

Call Id                               object
Agent                                 object
Date                          datetime64[ns]
Time                          datetime64[ns]
Topic                                 object
Answered (Y/N)                        object
Resolved                              object
Speed of answer in seconds           float64
AvgTalkDuration                      float64
Satisfaction rating                  float64
dtype: object

*Standardizing Formats*

In [118]:
data['Answered (Y/N)'] = data['Answered (Y/N)'].replace({'Y':'Yes','N':'No'})

In [119]:
data['Resolved'].unique()

array(['Y', 'N'], dtype=object)

In [120]:
data['Resolved'] = data['Resolved'].replace({'Y':'Yes','N':'No'})

In [121]:
data.sample(5)

Unnamed: 0,Call Id,Agent,Date,Time,Topic,Answered (Y/N),Resolved,Speed of answer in seconds,AvgTalkDuration,Satisfaction rating
190,ID0191,Becky,2021-01-04,1900-01-01 11:18:14,Contract related,Yes,Yes,45.0,6.52,5.0
419,ID0420,Dan,2021-01-08,1900-01-01 12:12:58,Streaming,Yes,Yes,112.0,3.68,2.0
4180,ID4181,Martha,2021-03-15,1900-01-01 10:13:26,Contract related,Yes,Yes,13.0,3.08,3.0
4309,ID4310,Dan,2021-03-17,1900-01-01 16:20:38,Streaming,No,No,67.52,3.75,3.4
2681,ID2682,Jim,2021-02-16,1900-01-01 14:54:14,Admin Support,Yes,Yes,118.0,4.88,4.0


## Data Aggregation

In [122]:
data.groupby('Agent')[['Speed of answer in seconds','AvgTalkDuration','Satisfaction rating']].mean()

Unnamed: 0_level_0,Speed of answer in seconds,AvgTalkDuration,Satisfaction rating
Agent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Becky,65.726276,3.681807,3.376545
Dan,67.324171,3.835292,3.439179
Diane,66.528657,3.670332,3.404107
Greg,68.263526,3.774054,3.403526
Jim,66.572973,3.791862,3.394895
Joe,70.350219,3.737892,3.343339
Martha,69.105768,3.732978,3.457053
Stewart,66.42543,3.766701,3.400344


In [123]:
data.groupby('Topic')['Answered (Y/N)'].count()

Topic
Admin Support         976
Contract related      976
Payment related      1007
Streaming            1022
Technical Support    1019
Name: Answered (Y/N), dtype: int64

In [124]:
call_count = data.groupby('Agent')['Call Id']
print('The maximum number of calls in the dataset : ',call_count.count().max())

The maximum number of calls in the dataset :  666


In [125]:
print('The maximum number of calls in the dataset : ',call_count.count().min())

The maximum number of calls in the dataset :  582


In [126]:
print('The minimum time taken for a agent call : ',data['Time'].max(),' minutes')

The minimum time taken for a agent call :  1900-01-01 18:00:00  minutes


In [127]:
print('The minimum time taken for a agent call : ',data['Time'].min(),' minutes')

The minimum time taken for a agent call :  1900-01-01 09:00:00  minutes


In [128]:
print('The Average satisfaction rating for each agent : ')
data.groupby('Topic')['Call Id'].count().round(2)

The Average satisfaction rating for each agent : 


Topic
Admin Support         976
Contract related      976
Payment related      1007
Streaming            1022
Technical Support    1019
Name: Call Id, dtype: int64

*Reshaping Data*

In [129]:
import numpy as np

In [130]:
data['Answered (Y/N)'] = data['Answered (Y/N)'].replace({'Yes':1,'No':0})
data['Resolved'] = data['Resolved'].replace({'Yes':1,'No':0})

  data['Answered (Y/N)'] = data['Answered (Y/N)'].replace({'Yes':1,'No':0})
  data['Resolved'] = data['Resolved'].replace({'Yes':1,'No':0})


In [131]:
data.pivot_table(index = ['Agent'], columns = ['Topic'], values = ['Answered (Y/N)','Resolved'], aggfunc = np.sum)

  data.pivot_table(index = ['Agent'], columns = ['Topic'], values = ['Answered (Y/N)','Resolved'], aggfunc = np.sum)


Unnamed: 0_level_0,Answered (Y/N),Answered (Y/N),Answered (Y/N),Answered (Y/N),Answered (Y/N),Resolved,Resolved,Resolved,Resolved,Resolved
Topic,Admin Support,Contract related,Payment related,Streaming,Technical Support,Admin Support,Contract related,Payment related,Streaming,Technical Support
Agent,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Becky,100,87,112,119,99,90,78,99,105,90
Dan,87,101,106,114,115,80,91,93,101,106
Diane,108,92,97,108,96,103,82,85,96,86
Greg,105,90,106,111,90,94,83,99,100,79
Jim,116,121,94,102,103,106,110,81,91,97
Joe,93,84,100,105,102,87,75,91,92,91
Martha,92,107,108,105,102,83,98,99,87,94
Stewart,94,107,95,83,98,80,92,82,77,93


## Metrics

*1. Call Handling Efficieny*

- *Total Calls Handled = Count of calls answered (Y)*

In [132]:
total_calls = data[data['Answered (Y/N)'] == 1].value_counts()
print('The Total Number of calls attended by agents : ',total_calls.count())

The Total Number of calls attended by agents :  4054


- *Answer Rate (%) = (Calls Answered / Total Calls) * 100*

In [133]:
total_calls = len(data)
calls_answered = data['Answered (Y/N)'].eq(1).sum()
answer_rate = ( calls_answered / total_calls ) * 100
print(f'The Answer rate in the Call Center Dataset : {answer_rate}%')

The Answer rate in the Call Center Dataset : 81.08%


- *Resolution Rate (%) = (Calls Resolved / Calls Answered) * 100*

In [134]:
call_resolved = data['Resolved'].eq(1).sum()
resolution_rate = ( call_resolved / calls_answered ) * 100
print(f'The Resolution rate in the Call Center Dataset : {resolution_rate.round(2)}%')

The Resolution rate in the Call Center Dataset : 89.94%


- *Average Speed of Answer (ASA) = Mean of "Speed of Answer in seconds"*

In [147]:
avg_speed_of_answer = data['Speed of answer in seconds'].mean().round(2)
print(f'The Average Speed of answer for calls is : {avg_speed_of_answer}%')

The Average Speed of answer for calls is : 67.52%


- *Abandonment Rate (%) = [(Total Calls - Answered Calls) / Total Calls] * 100*

In [136]:
abandment_rate =  ( total_calls - calls_answered ) / total_calls  * 100
print(f'The Abandment rate in the Call Center Dataset is : {abandment_rate}%')

The Abandment rate in the Call Center Dataset is : 18.92%


*2. Agent Performance*

- *Agent Workload = Calls handled per agent*

In [137]:
agent_workload = data.groupby('Agent')['Call Id'].count()

print('Calls Handled per agent : ')
for i, (agent_name, handled_calls) in enumerate(agent_workload.items()):
    print(f"{i+1}. {agent_name} have handled {handled_calls} calls.")

Calls Handled per agent : 
1. Becky have handled 631 calls.
2. Dan have handled 633 calls.
3. Diane have handled 633 calls.
4. Greg have handled 624 calls.
5. Jim have handled 666 calls.
6. Joe have handled 593 calls.
7. Martha have handled 638 calls.
8. Stewart have handled 582 calls.


- *Average Talk Duration = Mean of "AvgTalkDuration"*

In [148]:
avg_talk_duration = data['AvgTalkDuration'].mean().round(2)
print(f'The Average Call Duration is : {avg_talk_duration}')

The Average Call Duration is : 3.75


- *First Call Resolution (FCR) Rate = (Calls Resolved on First Attempt / Total Calls Answered) * 100*

In [139]:
unique_calls = data['Call Id'].nunique()
first_call_resolution_rate = ( unique_calls / total_calls ) * 100
print(f'The First Call Resolution (FCR) rate is : {first_call_resolution_rate}%')

The First Call Resolution (FCR) rate is : 100.0%


- *Satisfaction Score per Agent = Avg. "Satisfaction rating" per agent*

In [149]:
agent_satisfaction_per_agent = data.groupby('Agent')['Satisfaction rating'].mean().round(2)

print('Average satisfaction per agent : ')
for i, (agent_name, handled_calls) in enumerate(agent_satisfaction_per_agent.items()):
    print(f"{i+1}. {agent_name}'s Average Satisfaction rate : {handled_calls} calls.")

Average satisfaction per agent : 
1. Becky's Average Satisfaction rate : 3.38 calls.
2. Dan's Average Satisfaction rate : 3.44 calls.
3. Diane's Average Satisfaction rate : 3.4 calls.
4. Greg's Average Satisfaction rate : 3.4 calls.
5. Jim's Average Satisfaction rate : 3.39 calls.
6. Joe's Average Satisfaction rate : 3.34 calls.
7. Martha's Average Satisfaction rate : 3.46 calls.
8. Stewart's Average Satisfaction rate : 3.4 calls.


- *Calls Handled per Hour = (Total Calls Handled by Agent / Total Work Hours)*

In [141]:
data['hour'] = data['Time'].dt.hour 


In [151]:
call_agent_per_hour = data.groupby('Agent')['hour'].sum()

print('Call handled per hour : ')
for i, (agent_name, handled_calls) in enumerate(call_agent_per_hour.items()):
    print(f"{i+1}. {agent_name} have handled per hour : {handled_calls} calls.")

Call handled per hour : 
1. Becky have handled per hour : 8301 calls.
2. Dan have handled per hour : 8256 calls.
3. Diane have handled per hour : 8283 calls.
4. Greg have handled per hour : 8093 calls.
5. Jim have handled per hour : 8706 calls.
6. Joe have handled per hour : 7696 calls.
7. Martha have handled per hour : 8202 calls.
8. Stewart have handled per hour : 7622 calls.


*3. Customer Satisfaction*

- *Customer Satisfaction Score (CSAT) = Average "Satisfaction rating"*

In [152]:
customer_satisfaction_rate = data['Satisfaction rating'].mean().round(2)
print(f'The Customer Average Satisfaction rate is : {customer_satisfaction_rate}')

The Customer Average Satisfaction rate is : 3.4


- *Call Abandonment Impact = Correlation between "Speed of answer" and "Abandonment rate"*

In [144]:
import seaborn as sns
import matplotlib.pyplot as plt

In [145]:
"""correlation = data['Speed of answer in seconds'].corr(data['Abandonment_Rate'])
print(f"Correlation between Speed of Answer and Abandonment Rate: {correlation:.2f}")

# Scatter Plot with Regression Line
plt.figure(figsize=(8, 5))
sns.regplot(x=data['Speed of answer in seconds'], y=data['Abandonment_Rate'])
plt.xlabel("Speed of Answer (Seconds)")
plt.ylabel("Abandonment Rate (%)")
plt.title(f"Call Abandonment Impact (Correlation = {correlation:.2f})")
plt.show()
"""

'correlation = data[\'Speed of answer in seconds\'].corr(data[\'Abandonment_Rate\'])\nprint(f"Correlation between Speed of Answer and Abandonment Rate: {correlation:.2f}")\n\n# Scatter Plot with Regression Line\nplt.figure(figsize=(8, 5))\nsns.regplot(x=data[\'Speed of answer in seconds\'], y=data[\'Abandonment_Rate\'])\nplt.xlabel("Speed of Answer (Seconds)")\nplt.ylabel("Abandonment Rate (%)")\nplt.title(f"Call Abandonment Impact (Correlation = {correlation:.2f})")\nplt.show()\n'

- *Wait Time vs. Satisfaction = Analysis of how "Speed of Answer" affects "Satisfaction rating"*

In [153]:
data['Satisfaction rating'] = data['Satisfaction rating'].astype(float)

# Correlation
correlation = data['Speed of answer in seconds'].corr(data['Satisfaction rating'])
print(f"Correlation between Speed of Answer and Satisfaction Rating: {correlation:.2f}")

Correlation between Speed of Answer and Satisfaction Rating: 0.00


*4. Time-Based Analysis*

- *Peak Call Hours = Time slots with the highest number of calls*
- *Daily/Weekly Trends = Calls answered, resolution rate, and satisfaction trends over time*
- *Seasonality Analysis = Identifying trends in call volume based on specific months, days, or holidays*