In [8]:
!pip install faker




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import pandas as pd
import random
from faker import Faker
from datetime import timedelta
import numpy as np

fake = Faker()
random.seed(42)
np.random.seed(42)

num_tickets = 5000
categories = ['Billing', 'Fraud', 'Card Reissue', 'Travel Booking', 'Dispute']

data = []

for _ in range(num_tickets):
    created_at = fake.date_between(start_date='-1y', end_date='-10d')
    resolution_days = random.randint(1, 15)
    resolved_at = created_at + timedelta(days=resolution_days)
    
    entry = {
        'ticket_id': fake.uuid4(),
        'customer_id': fake.uuid4(),
        'agent_id': f"AGENT{random.randint(100, 150)}",
        'created_at': created_at,
        'resolved_at': resolved_at,
        'resolution_days': resolution_days,
        'category': random.choice(categories),
        'satisfaction_score': random.randint(1, 10),
    }
    data.append(entry)

df_perf = pd.DataFrame(data)


In [10]:
df_perf['sla_breached'] = df_perf['resolution_days'] > 7

df_perf['incentive_eligible'] = df_perf.apply(
    lambda row: row['resolution_days'] <= 7 and row['satisfaction_score'] >= 8, axis=1
)

print("Sample rows:")
print(df_perf.head())


Sample rows:
                              ticket_id                           customer_id  \
0  70fedbf6-9287-471e-943e-abfeee1673c5  0b196f3e-14ae-4e7d-abec-9308e5e9acfd   
1  6eb0fad6-9365-4759-9dfd-393917187807  1fff1b28-3d57-4276-87c6-a7c01ef1bf7b   
2  02defa67-5933-44ed-bcc3-018765021293  b965625d-af7a-432a-96fc-ea094697d256   
3  8d553ded-cfef-435e-aa59-cabfa9497366  98582654-f8cb-4759-8325-5772988b0826   
4  3e929629-6a0a-4a3c-a305-41b52c5f27ec  997329ef-f14b-4779-ada2-f55390e380eb   

   agent_id  created_at resolved_at  resolution_days category  \
0  AGENT107  2024-10-19  2024-10-30               11  Billing   
1  AGENT114  2025-02-04  2025-02-08                4    Fraud   
2  AGENT147  2024-09-11  2024-09-22               11  Dispute   
3  AGENT127  2025-02-19  2025-03-01               10  Billing   
4  AGENT113  2025-07-25  2025-07-27                2    Fraud   

   satisfaction_score  sla_breached  incentive_eligible  
0                   5          True               F

In [11]:
df_perf.to_csv("../data/intermediate data/agent_performance.csv", index=False)
print("Saved: ../data/intermediate data/agent_performance.csv")


Saved: ../data/intermediate data/agent_performance.csv


In [12]:
import pandas as pd

df_perf = pd.read_csv("../data/intermediate data/agent_performance.csv")

df_perf['created_at'] = pd.to_datetime(df_perf['created_at'])
df_perf['resolved_at'] = pd.to_datetime(df_perf['resolved_at'])

agent_kpis = df_perf.groupby('agent_id').agg(
    total_tickets=('ticket_id', 'count'),
    avg_resolution_days=('resolution_days', 'mean'),
    avg_satisfaction=('satisfaction_score', 'mean'),
    sla_breaches=('sla_breached', 'sum'),
    incentive_eligible_count=('incentive_eligible', 'sum')
).reset_index()

agent_kpis['incentive_rate'] = (agent_kpis['incentive_eligible_count'] / agent_kpis['total_tickets']).round(2)

print("Top 5 agent performance metrics:")
print(agent_kpis.head())


Top 5 agent performance metrics:
   agent_id  total_tickets  avg_resolution_days  avg_satisfaction  \
0  AGENT100             86             8.581395          5.627907   
1  AGENT101             89             8.471910          5.460674   
2  AGENT102            105             7.857143          5.742857   
3  AGENT103             86             7.720930          5.534884   
4  AGENT104             90             7.588889          5.533333   

   sla_breaches  incentive_eligible_count  incentive_rate  
0            49                        10            0.12  
1            54                        12            0.13  
2            54                        17            0.16  
3            45                        13            0.15  
4            47                        13            0.14  


In [13]:
category_summary = df_perf.groupby('category').agg(
    total_tickets=('ticket_id', 'count'),
    avg_resolution_days=('resolution_days', 'mean'),
    sla_breach_rate=('sla_breached', lambda x: round(x.mean(), 2)),
    avg_satisfaction=('satisfaction_score', 'mean')
).reset_index()

print("Issue category performance summary:")
print(category_summary)


Issue category performance summary:
         category  total_tickets  avg_resolution_days  sla_breach_rate  \
0         Billing           1028             8.013619             0.53   
1    Card Reissue            992             8.024194             0.54   
2         Dispute            973             8.133607             0.54   
3           Fraud            985             7.899492             0.52   
4  Travel Booking           1022             8.045988             0.54   

   avg_satisfaction  
0          5.431907  
1          5.666331  
2          5.565262  
3          5.323858  
4          5.490215  


In [14]:
agent_kpis.to_csv("../outputs/agent_performance_kpis.csv", index=False)
category_summary.to_csv("../outputs/category_performance_summary.csv", index=False)

print("Exported: agent_performance_kpis.csv and category_performance_summary.csv")


Exported: agent_performance_kpis.csv and category_performance_summary.csv
