TASK 1

created 200 tickets, weighted categories, random resolution times and escalations then added some realistic corruption (None, strings, negative numbers)


In [14]:
import random
random.seed(1)

In [15]:

ticket_ids = random.sample(range(1, 1001), 200)
customer_ids = random.sample(range(101, 1001), 200)
problem_categories = ('technical', 'billing', 'login', 'complaint')
category = random.choices(problem_categories, weights=[0.5, 0.2, 0.2, 0.1], k=200)
resolution_minutes = []

for i in category:
    if i == 'technical':
        resolution = random.randint(45, 120)
    elif i == 'billing':
        resolution = random.randint(20, 60)
    elif i == 'login':
        resolution = random.randint(10, 30)
    else: 
        resolution = random.randint(15, 90)

    resolution_minutes.append(resolution)


escalated_list = []

for res, cat in zip(resolution_minutes, category):
    if isinstance(res, int):
        if res > 90:
            prob = 0.4  
        elif res > 45:
            prob = 0.2 
        else:
            prob = 0.05 

        if cat == 'technical':
            prob += 0.1  

        escalated = random.random() < prob
    else:
        escalated = False
    escalated_list.append(escalated)

Above, i created the values to fill into the items of each key, without any corruption yet

In [16]:
support_tickets = []

for i in range(200):
    ticket = {
        'ticket_id': ticket_ids[i],
        'customer_id': customer_ids[i],
        'category': category[i],
        'resolution_minutes': resolution_minutes[i],
        'escalated': escalated_list[i]
    }
    support_tickets.append(ticket)

print(support_tickets)

[{'ticket_id': 138, 'customer_id': 917, 'category': 'billing', 'resolution_minutes': 35, 'escalated': False}, {'ticket_id': 583, 'customer_id': 972, 'category': 'complaint', 'resolution_minutes': 63, 'escalated': False}, {'ticket_id': 868, 'customer_id': 937, 'category': 'technical', 'resolution_minutes': 100, 'escalated': False}, {'ticket_id': 822, 'customer_id': 362, 'category': 'technical', 'resolution_minutes': 95, 'escalated': True}, {'ticket_id': 783, 'customer_id': 134, 'category': 'billing', 'resolution_minutes': 30, 'escalated': False}, {'ticket_id': 65, 'customer_id': 962, 'category': 'technical', 'resolution_minutes': 86, 'escalated': True}, {'ticket_id': 262, 'customer_id': 790, 'category': 'login', 'resolution_minutes': 24, 'escalated': False}, {'ticket_id': 121, 'customer_id': 173, 'category': 'technical', 'resolution_minutes': 61, 'escalated': False}, {'ticket_id': 508, 'customer_id': 186, 'category': 'technical', 'resolution_minutes': 107, 'escalated': True}, {'ticket_i

Above, the support tickets are created without a corruption

In [17]:
corrupt_tickets = support_tickets.copy()

for ticket in corrupt_tickets:
    if random.random() < 0.1:
        ticket['resolution_minutes'] = None

    elif random.random() < 0.06:
        ticket['resolution_minutes'] = "unknown"

    elif random.random() < 0.085 and isinstance(ticket['resolution_minutes'], int):
        ticket['resolution_minutes'] *= -1

for t in corrupt_tickets[:5]:
    print(t)

print(len(corrupt_tickets))

{'ticket_id': 138, 'customer_id': 917, 'category': 'billing', 'resolution_minutes': 35, 'escalated': False}
{'ticket_id': 583, 'customer_id': 972, 'category': 'complaint', 'resolution_minutes': 63, 'escalated': False}
{'ticket_id': 868, 'customer_id': 937, 'category': 'technical', 'resolution_minutes': -100, 'escalated': False}
{'ticket_id': 822, 'customer_id': 362, 'category': 'technical', 'resolution_minutes': None, 'escalated': True}
{'ticket_id': 783, 'customer_id': 134, 'category': 'billing', 'resolution_minutes': 30, 'escalated': False}
200


Above, I have created a corrupted list of support tickets(which were clean of corruption)

TASK 2

checked for missing or malformed resolution_minutes and counted them

In [18]:
def check_res(tickets):
    corrupted_index = {
        'None': [],
        'String': [],
        'Negative': []
    }

    for index, ticket in enumerate(tickets):
        res = ticket['resolution_minutes']
        if res is None:
            corrupted_index['None'].append(index)
        elif isinstance(res, str):
            corrupted_index['String'].append(index)
        elif isinstance(res, int) and res < 0:
            corrupted_index['Negative'].append(index)


    return corrupted_index

In [19]:
def num_corr(corrupted_index):
    return {
        'None': len(corrupted_index['None']),
        'String': len(corrupted_index['String']),
        'Negative': len(corrupted_index['Negative'])
    }

In [20]:
indices = check_res(corrupt_tickets)
print(indices)
print(num_corr(indices))

{'None': [3, 6, 7, 43, 46, 81, 82, 86, 89, 92, 108, 116, 124, 133, 148, 149, 153, 156, 167, 186, 194], 'String': [9, 61, 73, 91, 121, 122, 123, 183], 'Negative': [2, 18, 20, 35, 83, 88, 101, 136, 139, 182]}
{'None': 21, 'String': 8, 'Negative': 10}


TASK 3

fixed negative numbers, replaced missing with category defaults, droped strings, also normalized category names

In [21]:
#problem_categories = ('technical', 'billing', 'login', 'complaint')

def clean_tickets(raw_records):   
    #based on the mean of lower and upper bounds of resolution_minute 
    default_resolution = {
        'technical': 80,
        'billing': 40,
        'login': 20,
        'complaint': 50
    }
    
    cleaned = []
    
    for record in raw_records:
        new_record = record.copy()
        category = str(new_record['category']).strip().lower()
        
        if category not in problem_categories:
            continue 
        
        new_record['category'] = category
        
        res = new_record['resolution_minutes']
        
        if res is None:
            # replace missing ones with default_resolution(mean of upper and lower bounds)
            new_record['resolution_minutes'] = default_resolution[category]
            
        elif isinstance(res, str):
            continue  # drop string formatted ones
            
        elif isinstance(res, int):
            if res < 0:
                new_record['resolution_minutes'] = abs(res)
        else:
            continue  #other type of corruption
        
        cleaned.append(new_record)
    
    return cleaned

In [22]:
new_tickets = clean_tickets(corrupt_tickets)
print(f'Before Cleaning: {len(corrupt_tickets)}\nAfter Cleaning: {len(new_tickets)}\n')
print(new_tickets[:5])


Before Cleaning: 200
After Cleaning: 192

[{'ticket_id': 138, 'customer_id': 917, 'category': 'billing', 'resolution_minutes': 35, 'escalated': False}, {'ticket_id': 583, 'customer_id': 972, 'category': 'complaint', 'resolution_minutes': 63, 'escalated': False}, {'ticket_id': 868, 'customer_id': 937, 'category': 'technical', 'resolution_minutes': 100, 'escalated': False}, {'ticket_id': 822, 'customer_id': 362, 'category': 'technical', 'resolution_minutes': 80, 'escalated': True}, {'ticket_id': 783, 'customer_id': 134, 'category': 'billing', 'resolution_minutes': 30, 'escalated': False}]


TASK 4

created average resolution per category, ticket counts per customer, overall and per-category escalation rates, and then validated totals with assertions

In [23]:
def avg_resolution_by_category(tickets):
    totals = {}
    counts = {}
    
    for i in tickets:
        cat = i['category']
        res = i['resolution_minutes']
        
        totals[cat] = totals.get(cat, 0) + res
        counts[cat] = counts.get(cat, 0) + 1

    averages = {}
    for cat in totals:
        avg_res = totals[cat] / counts[cat]
        averages[cat] = avg_res

    
    return averages


avg_res = avg_resolution_by_category(new_tickets)

category_counts = {}
for t in new_tickets:
    category_counts[t['category']] = category_counts.get(t['category'], 0) + 1

assert sum(category_counts.values()) == len(new_tickets)

avg_res


{'billing': 39.921052631578945,
 'complaint': 61.3,
 'technical': 86.68888888888888,
 'login': 19.681818181818183}

In [24]:
def tickets_per_customer(records):
    customer_counts = {}
    
    for ticket in records:
        cust_id = ticket['customer_id']
        customer_counts[cust_id] = customer_counts.get(cust_id, 0) + 1
    
    return customer_counts

customer_summary = tickets_per_customer(new_tickets)
assert sum(customer_summary.values()) == len(new_tickets)

print(customer_summary)

{917: 1, 972: 1, 937: 1, 362: 1, 134: 1, 962: 1, 790: 1, 173: 1, 186: 1, 118: 1, 564: 1, 115: 1, 873: 1, 874: 1, 388: 1, 356: 1, 376: 1, 213: 1, 1000: 1, 740: 1, 290: 1, 453: 1, 398: 1, 172: 1, 272: 1, 264: 1, 997: 1, 641: 1, 273: 1, 773: 1, 380: 1, 764: 1, 829: 1, 402: 1, 566: 1, 820: 1, 430: 1, 609: 1, 586: 1, 217: 1, 125: 1, 420: 1, 496: 1, 452: 1, 532: 1, 916: 1, 293: 1, 365: 1, 212: 1, 360: 1, 848: 1, 623: 1, 315: 1, 721: 1, 543: 1, 998: 1, 122: 1, 331: 1, 119: 1, 507: 1, 137: 1, 837: 1, 265: 1, 557: 1, 822: 1, 619: 1, 795: 1, 537: 1, 658: 1, 326: 1, 746: 1, 812: 1, 629: 1, 562: 1, 329: 1, 637: 1, 765: 1, 132: 1, 505: 1, 792: 1, 690: 1, 429: 1, 776: 1, 747: 1, 931: 1, 161: 1, 856: 1, 406: 1, 318: 1, 149: 1, 414: 1, 993: 1, 179: 1, 418: 1, 910: 1, 862: 1, 263: 1, 527: 1, 679: 1, 359: 1, 234: 1, 109: 1, 675: 1, 139: 1, 705: 1, 323: 1, 684: 1, 572: 1, 276: 1, 821: 1, 738: 1, 622: 1, 893: 1, 488: 1, 306: 1, 456: 1, 202: 1, 544: 1, 706: 1, 299: 1, 605: 1, 207: 1, 782: 1, 500: 1, 404: 1

In [25]:
def escalation_summary(records):
    total = len(records)
    escalated_total = 0
    
    category_totals = {}
    category_escalated = {}
    
    for ticket in records:
        cat = ticket['category']
        esc = ticket['escalated']
        if esc: #overall
            escalated_total += 1
            category_escalated[cat] = category_escalated.get(cat, 0) + 1
        category_totals[cat] = category_totals.get(cat, 0) + 1 #category

    overall_rate = escalated_total / total if total > 0 else 0

    category_rates = {}
    for cat in category_totals:
        category_rates[cat] = category_escalated.get(cat, 0) / category_totals[cat]
    
    return {
        'overall_rate': overall_rate,
        'by_category': category_rates
    }


rates = escalation_summary(new_tickets)
print(f"Overall rate: {rates['overall_rate']}\nBy category: {rates['by_category']}")


Overall rate: 0.22916666666666666
By category: {'billing': 0.13157894736842105, 'complaint': 0.2, 'technical': 0.35555555555555557, 'login': 0.06818181818181818}


TASK 5

combined summaries into one dictionary

In [26]:
final_report = {
    'average_resolution_by_category': avg_res,
    'tickets_per_customer': customer_summary,
    'escalation_rates': rates
}

print("Final Report (compact view):")
print(f"Average resolution by category: {final_report['average_resolution_by_category']}")
print(f"Overall escalation rate: {final_report['escalation_rates']['overall_rate']:.2%}")
print(f"Escalation by category: {final_report['escalation_rates']['by_category']}")
print(f"Number of tickets per customer (sample 5): {dict(list(final_report['tickets_per_customer'].items())[:5])}")


Final Report (compact view):
Average resolution by category: {'billing': 39.921052631578945, 'complaint': 61.3, 'technical': 86.68888888888888, 'login': 19.681818181818183}
Overall escalation rate: 22.92%
Escalation by category: {'billing': 0.13157894736842105, 'complaint': 0.2, 'technical': 0.35555555555555557, 'login': 0.06818181818181818}
Number of tickets per customer (sample 5): {917: 1, 972: 1, 937: 1, 362: 1, 134: 1}
