In [1]:
from faker import Faker
import random
from datetime import datetime, timedelta
import json
import pandas as pd

In [9]:


def generate_call_center_data(num_records=1000):
    """
    Generates a list of dictionaries for a call center interaction dataset.
    """
    fake = Faker() # Use 'en_IN' for Indian-specific data
    records = []
    
    # Pre-generate lists of agents, customers, and common issues for consistency
    agents = [{"id": fake.uuid4(), "name": fake.name()} for _ in range(20)]
    customers = [{"id": fake.uuid4(), "name": fake.name()} for _ in range(500)]
    
    common_issues = [
        "Billing Inquiry", "Technical Support", "Product Information", 
        "Order Status", "Complaint Resolution", "Account Activation",
        "Subscription Cancellation", "Service Downgrade", "Login Issue"
    ]
    
    for i in range(num_records):
        # Select random agent and customer
        agent = random.choice(agents)
        customer = random.choice(customers)
        
        # Determine issue and resolution status
        issue = random.choice(common_issues)
        resolution_status = "Resolved" if random.random() < 0.8 else "Unresolved"
        
        # Generate a realistic call start and end time
        call_start = fake.date_time_this_year()
        call_duration_seconds = random.randint(60, 1200) # 1 to 20 minutes
        call_end = call_start + timedelta(seconds=call_duration_seconds)
        
        # Create the interaction record
        record = {
            "interaction_id": fake.unique.uuid4(),
            "timestamp": call_start.isoformat(),
            "agent_id": agent["id"],
            "agent_name": agent["name"],
            "customer_id": customer["id"],
            "customer_name": customer["name"],
            "call_duration_seconds": call_duration_seconds,
            "issue_category": issue,
            "resolution_status": resolution_status,
            "call_summary": fake.text(max_nb_chars=150) # A short, fake summary
        }
        records.append(record)
        
    return records


In [10]:
call_data = generate_call_center_data()
print(f"Generated {len(call_data)} call center records.")


Generated 1000 call center records.


* SAVE AS JSON

In [11]:
import json
with open('call_center_data.json', 'w') as f:
    json.dump(call_data, f, indent=4)

* SAVE AS CSV

In [12]:
import pandas as pd
df = pd.DataFrame(call_data)
df.to_csv('call_center_data.csv', index=False)

In [13]:
from faker import Faker
import random
from datetime import datetime, timedelta


In [23]:
from faker import Faker
import random
from datetime import datetime, timedelta

def generate_call_center_data(num_records=1000):
    """
    Generates a list of dictionaries for a call center interaction dataset
    with full conversations between agents and customers.
    """
    fake = Faker()
    records = []
    
    # Pre-generate lists of agents, customers, and common issues for consistency
    agents = [{"id": fake.uuid4(), "name": fake.name()} for _ in range(20)]
    customers = [{"id": fake.uuid4(), "name": fake.name()} for _ in range(500)]
    
    common_issues = [
        "Billing Inquiry", "Technical Support", "Product Information", 
        "Order Status", "Complaint Resolution", "Account Activation",
        "Subscription Cancellation", "Service Downgrade", "Login Issue"
    ]
    
    for i in range(num_records):
        agent = random.choice(agents)
        customer = random.choice(customers)
        issue = random.choice(common_issues)
        resolution_status = "Resolved" if random.random() < 0.8 else "Unresolved"
        
        call_start = fake.date_time_this_year()
        call_duration_seconds = random.randint(60, 1200)
        call_end = call_start + timedelta(seconds=call_duration_seconds)
        
        # --- Generate the conversation as a list of dictionaries ---
        conversation = []
        
        # Opening
        conversation.append({"speaker": "agent", "text": f"Hello, thank you for calling. My name is {agent['name']}. How may I help you today?"})
        conversation.append({"speaker": "customer", "text": f"Hi, I'm calling about a {issue.lower()}."})
        
        # Main dialogue
        num_exchanges = random.randint(2, 6)
        for _ in range(num_exchanges):
            customer_response = fake.text(max_nb_chars=100)
            agent_response = fake.text(max_nb_chars=100)
            conversation.append({"speaker": "customer", "text": customer_response})
            conversation.append({"speaker": "agent", "text": agent_response})
        
        # Closing
        if resolution_status == "Resolved":
            conversation.append({"speaker": "agent", "text": "Thank you for your patience. The issue has been resolved. Is there anything else I can assist you with?"})
        else:
            conversation.append({"speaker": "agent", "text": "I'm sorry, I'm unable to resolve this issue right now. I'll escalate it for you."})
            
        conversation.append({"speaker": "customer", "text": "Thank you for your help. Goodbye."})

        # Create the interaction record without the 'call_summary' field
        record = {
            "interaction_id": fake.unique.uuid4(),
            "timestamp": call_start.isoformat(),
            "agent_id": agent["id"],
            "agent_name": agent["name"],
            "customer_id": customer["id"],
            "customer_name": customer["name"],
            "call_duration_seconds": call_duration_seconds,
            "issue_category": issue,
            "resolution_status": resolution_status,
            "conversation": conversation, # The conversation field
        }
        records.append(record)
        
    return records


In [25]:

call_data = generate_call_center_data()
print(f"Generated {len(call_data)} call center records with conversations.")

# Example of a single record
import pprint
pprint.pprint(call_data[0:3])



Generated 1000 call center records with conversations.
[{'agent_id': '6d1a634c-df88-4731-a5ea-330c68ab1669',
  'agent_name': 'Janet Howell MD',
  'call_duration_seconds': 295,
  'conversation': [{'speaker': 'agent',
                    'text': 'Hello, thank you for calling. My name is Janet '
                            'Howell MD. How may I help you today?'},
                   {'speaker': 'customer',
                    'text': "Hi, I'm calling about a technical support."},
                   {'speaker': 'customer',
                    'text': 'Eat notice effect reality cultural tell. Well '
                            'sport available staff find your organization '
                            'law.'},
                   {'speaker': 'agent',
                    'text': 'Building region learn know can on. Front '
                            'Republican street material wonder blue around '
                            'movie.'},
                   {'speaker': 'customer',
               