# Call Center Data Cleaning Notebook

In [None]:

import pandas as pd
import numpy as np

# Load dataset
file_path = "01 Call-Center-Dataset.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")
df.head()


In [None]:

# Convert Date and Time into a single datetime
df["Datetime"] = pd.to_datetime(df["Date"].astype(str) + " " + df["Time"].astype(str), errors="coerce")

# Convert Answered and Resolved to boolean (1/0)
df["Answered"] = df["Answered (Y/N)"].map({"Y": 1, "N": 0})
df["Resolved"] = df["Resolved"].map({"Y": 1, "N": 0})

# Convert AvgTalkDuration (HH:MM:SS) into seconds
def duration_to_seconds(x):
    try:
        h, m, s = map(int, x.split(":"))
        return h*3600 + m*60 + s
    except:
        return np.nan

df["TalkDurationSeconds"] = df["AvgTalkDuration"].astype(str).apply(duration_to_seconds)

# Clean column names (remove spaces)
df.columns = [c.strip().replace(" ", "_") for c in df.columns]

# Drop redundant columns
df_cleaned = df.drop(columns=["Date", "Time", "Answered_(Y/N)", "AvgTalkDuration"])

df_cleaned.head()


In [None]:

# Save cleaned dataset
df_cleaned.to_csv("cleaned_call_center.csv", index=False)
print("Cleaned dataset saved as cleaned_call_center.csv")
