# I- Data gathering and exploration

### This data represents the daily report of the call center, operating for 8 hours a day over 5 days each week.


In [1]:
#https://github.com/Afefjebali/Afef-check-points/blob/9460e897fa3bfc42c4ebf599ed2329180d801906/Call%20center%20reports.csv

#### 1- Received Calls: This denotes the daily count of calls received. Answered Calls: This reflects the number of calls successfully handled per day. 

#### 2- Pickup Ratio: Calculated as Answered Calls divided by Received Calls per day, this ratio indicates the effectiveness in addressing incoming calls.

#### 3- Service Required: This represents the percentage of service commitments agreed upon with stakeholders to be achieved.

#### 4- Dunning Letters: This signifies the number of reminders sent to clients on that particular day.

#### 5- Staff per Day: This indicates the number of agents handling calls on a daily basis.

### Step 1 : Import the dataset

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
file_path=r"C:\Users\afef.jebali\OneDrive - TUI\Desktop\Training Course\Exercice 01\Call center reports.csv"
df=pd.read_csv(file_path)
df.shape

(700, 7)

### Step 2 : Explore the dataset

In [3]:
#info about last 10 raws in the data set
df.head(10)

Unnamed: 0,date,Received calls,Answered calls2,Picked up Ratio,Service required,Dunning letters,Staff per day
0,"Thursday, February 4, 2021",216.0,91.5,42%,70%,0.0,0.0
1,"Friday, February 5, 2021",214.5,126.0,59%,70%,0.0,0.0
2,"Monday, February 8, 2021",171.0,112.5,66%,70%,0.0,0.0
3,"Tuesday, February 9, 2021",163.5,105.0,64%,70%,0.0,0.0
4,"Wednesday, February 10, 2021",132.0,117.0,89%,70%,0.0,0.0
5,"Thursday, February 11, 2021",138.0,106.5,77%,70%,0.0,0.0
6,"Friday, February 12, 2021",144.0,97.5,68%,70%,0.0,0.0
7,"Thursday, February 18, 2021",148.5,103.5,70%,70%,0.0,0.0
8,"Friday, February 19, 2021",123.0,79.5,65%,70%,0.0,0.0
9,"Monday, February 22, 2021",217.5,111.0,51%,70%,0.0,0.0


In [4]:
df.dtypes

date                 object
Received calls      float64
Answered calls2     float64
Picked up Ratio      object
Service required     object
Dunning letters     float64
Staff per day       float64
dtype: object

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   date              700 non-null    object 
 1   Received calls    700 non-null    float64
 2   Answered calls2   700 non-null    float64
 3   Picked up Ratio   700 non-null    object 
 4   Service required  700 non-null    object 
 5   Dunning letters   700 non-null    float64
 6   Staff per day     700 non-null    float64
dtypes: float64(4), object(3)
memory usage: 38.4+ KB


In [6]:
df.columns

Index(['date ', 'Received calls', 'Answered calls2', 'Picked up Ratio',
       'Service required', 'Dunning letters', 'Staff per day'],
      dtype='object')

### Step 3 : preprocess the dataset

##### Because I have Zeros values in my data I decided in the step below tocalculate the mean for both columns 'Dunning letters' and 'Staff per day' and replace the zeros values in my data with the result.

###### Handling Missing Values

In [7]:
df['Staff per day'].fillna(df['Staff per day'].mean(), inplace=True)
print("Mean:", df['Staff per day'].mean())

Mean: 9.495514285714286


In [8]:
df['Dunning letters'].fillna(df['Dunning letters'].mean(), inplace=True)
print("Mean:", df['Dunning letters'].mean())

Mean: 843.7692857142857


#### Replace missing values with mean

In [9]:
df['Dunning letters'].replace(0, 843.7692857142857, inplace=True)
print(df)

                            date   Received calls  Answered calls2  \
0      Thursday, February 4, 2021           216.0             91.5   
1        Friday, February 5, 2021           214.5            126.0   
2        Monday, February 8, 2021           171.0            112.5   
3       Tuesday, February 9, 2021           163.5            105.0   
4    Wednesday, February 10, 2021           132.0            117.0   
..                            ...             ...              ...   
695      Friday, November 3, 2023           774.0            762.0   
696      Monday, November 6, 2023           901.5            892.5   
697     Tuesday, November 7, 2023           715.5            709.5   
698   Wednesday, November 8, 2023           699.0            688.5   
699    Thursday, November 9, 2023           703.5            703.5   

    Picked up Ratio Service required  Dunning letters  Staff per day  
0               42%              70%       843.769286          0.000  
1               5

In [10]:
df['Staff per day'].replace(0, 9.495514285714286, inplace=True)
print(df)

                            date   Received calls  Answered calls2  \
0      Thursday, February 4, 2021           216.0             91.5   
1        Friday, February 5, 2021           214.5            126.0   
2        Monday, February 8, 2021           171.0            112.5   
3       Tuesday, February 9, 2021           163.5            105.0   
4    Wednesday, February 10, 2021           132.0            117.0   
..                            ...             ...              ...   
695      Friday, November 3, 2023           774.0            762.0   
696      Monday, November 6, 2023           901.5            892.5   
697     Tuesday, November 7, 2023           715.5            709.5   
698   Wednesday, November 8, 2023           699.0            688.5   
699    Thursday, November 9, 2023           703.5            703.5   

    Picked up Ratio Service required  Dunning letters  Staff per day  
0               42%              70%       843.769286       9.495514  
1               5

In [11]:
df.head(10)

Unnamed: 0,date,Received calls,Answered calls2,Picked up Ratio,Service required,Dunning letters,Staff per day
0,"Thursday, February 4, 2021",216.0,91.5,42%,70%,843.769286,9.495514
1,"Friday, February 5, 2021",214.5,126.0,59%,70%,843.769286,9.495514
2,"Monday, February 8, 2021",171.0,112.5,66%,70%,843.769286,9.495514
3,"Tuesday, February 9, 2021",163.5,105.0,64%,70%,843.769286,9.495514
4,"Wednesday, February 10, 2021",132.0,117.0,89%,70%,843.769286,9.495514
5,"Thursday, February 11, 2021",138.0,106.5,77%,70%,843.769286,9.495514
6,"Friday, February 12, 2021",144.0,97.5,68%,70%,843.769286,9.495514
7,"Thursday, February 18, 2021",148.5,103.5,70%,70%,843.769286,9.495514
8,"Friday, February 19, 2021",123.0,79.5,65%,70%,843.769286,9.495514
9,"Monday, February 22, 2021",217.5,111.0,51%,70%,843.769286,9.495514


###### Round the float values in the "Dunning letters" and "Staff per day" columns

In [12]:
df['Dunning letters'] = df['Dunning letters'].round()
df['Staff per day'] = df['Staff per day'].round()
df

Unnamed: 0,date,Received calls,Answered calls2,Picked up Ratio,Service required,Dunning letters,Staff per day
0,"Thursday, February 4, 2021",216.0,91.5,42%,70%,844.0,9.0
1,"Friday, February 5, 2021",214.5,126.0,59%,70%,844.0,9.0
2,"Monday, February 8, 2021",171.0,112.5,66%,70%,844.0,9.0
3,"Tuesday, February 9, 2021",163.5,105.0,64%,70%,844.0,9.0
4,"Wednesday, February 10, 2021",132.0,117.0,89%,70%,844.0,9.0
...,...,...,...,...,...,...,...
695,"Friday, November 3, 2023",774.0,762.0,98%,70%,1030.0,44.0
696,"Monday, November 6, 2023",901.5,892.5,99%,70%,1776.0,40.0
697,"Tuesday, November 7, 2023",715.5,709.5,99%,70%,1324.0,34.0
698,"Wednesday, November 8, 2023",699.0,688.5,98%,70%,956.0,31.0
