In [81]:
import random
random.seed(42)
categories = ["technical", "billing", "general", "account"]
weights = [5, 3, 1, 2]  

raw_tickets = []

for i in range(1, 201):
    ticket = {}

    ticket["ticket_id"] = i
    ticket["category"] = random.choices(categories, weights=weights, k=1)[0]
    resolution = random.randint(5, 180)

    if random.random() < 0.05:
        resolution = None
    elif random.random() < 0.05:
        resolution = "unknown"
    elif random.random() < 0.02:
        resolution = -10

    ticket["resolution_minutes"] = resolution

    ticket["escalated"] = random.choice([True, False])

    raw_tickets.append(ticket)

print(raw_tickets[:10])

print("\nTotal tickets:", len(raw_tickets))

[{'ticket_id': 1, 'category': 'billing', 'resolution_minutes': 11, 'escalated': True}, {'ticket_id': 2, 'category': 'billing', 'resolution_minutes': 144, 'escalated': True}, {'ticket_id': 3, 'category': 'technical', 'resolution_minutes': None, 'escalated': True}, {'ticket_id': 4, 'category': 'billing', 'resolution_minutes': 144, 'escalated': True}, {'ticket_id': 5, 'category': 'general', 'resolution_minutes': 45, 'escalated': False}, {'ticket_id': 6, 'category': 'technical', 'resolution_minutes': 102, 'escalated': True}, {'ticket_id': 7, 'category': 'general', 'resolution_minutes': 142, 'escalated': False}, {'ticket_id': 8, 'category': 'account', 'resolution_minutes': 163, 'escalated': True}, {'ticket_id': 9, 'category': 'technical', 'resolution_minutes': 63, 'escalated': True}, {'ticket_id': 10, 'category': 'technical', 'resolution_minutes': 121, 'escalated': True}]

Total tickets: 200


In [82]:
def check_keys(records):
    required_keys = {"ticket_id", "category", "resolution_minutes", "escalated"}
    bad_records = []
    for record in records:
        if set(record.keys()) != required_keys:
            bad_records.append(record)
    return bad_records

def check_resolution(records):
    bad_records = []
    for record in records:
        res = record["resolution_minutes"]
        if not isinstance(res, (int, float)) or res < 0:
            bad_records.append(record)
    return bad_records

bad_records_keys = check_keys(raw_tickets)
print(bad_records_keys[:10])
print(f"Len of the bad records keys: {len(bad_records_keys)}")

print("====================")

bad_records_resolution_minutes = check_resolution(raw_tickets)
print(bad_records_resolution_minutes[:10])
print(f"Len of the bad records resolution_minutes: {len(bad_records_resolution_minutes)}")


[]
Len of the bad records keys: 0
[{'ticket_id': 3, 'category': 'technical', 'resolution_minutes': None, 'escalated': True}, {'ticket_id': 13, 'category': 'billing', 'resolution_minutes': 'unknown', 'escalated': False}, {'ticket_id': 21, 'category': 'technical', 'resolution_minutes': 'unknown', 'escalated': False}, {'ticket_id': 24, 'category': 'billing', 'resolution_minutes': None, 'escalated': False}, {'ticket_id': 30, 'category': 'technical', 'resolution_minutes': None, 'escalated': True}, {'ticket_id': 62, 'category': 'technical', 'resolution_minutes': 'unknown', 'escalated': False}, {'ticket_id': 68, 'category': 'technical', 'resolution_minutes': 'unknown', 'escalated': False}, {'ticket_id': 88, 'category': 'account', 'resolution_minutes': 'unknown', 'escalated': False}, {'ticket_id': 90, 'category': 'billing', 'resolution_minutes': 'unknown', 'escalated': False}, {'ticket_id': 92, 'category': 'account', 'resolution_minutes': 'unknown', 'escalated': False}]
Len of the bad records 

In [83]:
def clean_dataset(raw_records):
    cleaned_records = []

    for record in raw_records:
        res = record["resolution_minutes"]
        if not isinstance(res, (int, float)) or res < 0:
            res = 60

        category = record.get("category", "").strip().title()

        cleaned_ticket = record.copy()
        cleaned_ticket["category"] = category
        cleaned_ticket["resolution_minutes"] = res

        cleaned_records.append(cleaned_ticket)

    return cleaned_records

cleaned = clean_dataset(raw_tickets)
print(cleaned[:5])


[{'ticket_id': 1, 'category': 'Billing', 'resolution_minutes': 11, 'escalated': True}, {'ticket_id': 2, 'category': 'Billing', 'resolution_minutes': 144, 'escalated': True}, {'ticket_id': 3, 'category': 'Technical', 'resolution_minutes': 60, 'escalated': True}, {'ticket_id': 4, 'category': 'Billing', 'resolution_minutes': 144, 'escalated': True}, {'ticket_id': 5, 'category': 'General', 'resolution_minutes': 45, 'escalated': False}]


In [85]:
cleaned = clean_dataset(raw_tickets)
def average_resolution(cleaned_records):
    category_sum = {}
    category_count = {}

    for record in cleaned_records:
        cat = record["category"]
        res = record["resolution_minutes"]

        category_sum[cat] = category_sum.get(cat, 0) + res
        category_count[cat] = category_count.get(cat, 0) + 1

    average_resolution_time_per_category = {}

    for cat in category_sum:
        average_resolution_time_per_category[cat] = (
            category_sum[cat] / category_count[cat]
        )

    return average_resolution_time_per_category

print(average_resolution(cleaned))

print("==================================================================")
print("==================================================================")
print("==================================================================")


def escalation_rate(cleaned_records):
    true_esca = {}
    total_counts = {}

    for record in cleaned_records:
        cat = record["category"]
        esca = record["escalated"]

        true_esca[cat] = true_esca.get(cat, 0) + esca
        total_counts[cat] = total_counts.get(cat, 0) + 1

    escalation_rates = {}

    for cat in total_counts:
        escalation_rates[cat] = true_esca[cat] / total_counts[cat]

    overall_true = sum(true_esca.values())
    overall_total = sum(total_counts.values())
    escalation_rates["overall"] = overall_true / overall_total

    return escalation_rates

print(escalation_rate(cleaned))

print("==================================================================")
print("==================================================================")
print("==================================================================")


#We don,t have customer key so i added a new key about customer
customers = [f"customer_{i}" for i in range(1, 51)]
for record in raw_tickets:
    record["customer_id"] = random.choice(customers)

def tickets_per_customer(cleaned_records):
    counts = {}
    for record in cleaned_records:
        customer = record["customer_id"]  
        counts[customer] = counts.get(customer, 0) + 1
    return counts


print(sum(tickets_per_customer(cleaned).values()) == len(cleaned))
print(tickets_per_customer(cleaned))

{'Billing': 100.8360655737705, 'Technical': 89.44186046511628, 'General': 86.0, 'Account': 95.0952380952381}
{'Billing': 0.5245901639344263, 'Technical': 0.5697674418604651, 'General': 0.7272727272727273, 'Account': 0.42857142857142855, 'overall': 0.535}
True
{'customer_5': 4, 'customer_6': 8, 'customer_28': 10, 'customer_7': 4, 'customer_48': 7, 'customer_24': 4, 'customer_9': 3, 'customer_36': 7, 'customer_4': 5, 'customer_38': 3, 'customer_22': 5, 'customer_43': 8, 'customer_8': 6, 'customer_27': 4, 'customer_23': 6, 'customer_49': 6, 'customer_47': 2, 'customer_19': 4, 'customer_39': 5, 'customer_20': 6, 'customer_37': 6, 'customer_33': 3, 'customer_14': 5, 'customer_10': 4, 'customer_31': 1, 'customer_15': 5, 'customer_18': 5, 'customer_50': 4, 'customer_40': 4, 'customer_44': 4, 'customer_42': 1, 'customer_2': 5, 'customer_45': 3, 'customer_12': 3, 'customer_1': 3, 'customer_26': 7, 'customer_41': 2, 'customer_34': 3, 'customer_25': 2, 'customer_30': 3, 'customer_11': 4, 'custome

In [86]:
def generate_final_report(cleaned_records):
    report = {
        "average_resolution_per_category": average_resolution(cleaned_records),
        "escalation_rate": escalation_rate(cleaned_records),
        "tickets_per_customer": tickets_per_customer(cleaned_records)
    }
    return report

cleaned = clean_dataset(raw_tickets)

final_report = generate_final_report(cleaned)

#I used it for more readable ooutput
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(final_report)

{   'average_resolution_per_category': {   'Account': 95.0952380952381,
                                           'Billing': 100.8360655737705,
                                           'General': 86.0,
                                           'Technical': 89.44186046511628},
    'escalation_rate': {   'Account': 0.42857142857142855,
                           'Billing': 0.5245901639344263,
                           'General': 0.7272727272727273,
                           'Technical': 0.5697674418604651,
                           'overall': 0.535},
    'tickets_per_customer': {   'customer_10': 9,
                                'customer_11': 3,
                                'customer_12': 5,
                                'customer_13': 4,
                                'customer_14': 1,
                                'customer_15': 1,
                                'customer_16': 2,
                                'customer_17': 2,
                                'custo

### Final Report

- Technical tickets generally take longer to resolve and are escalated more often.  
- Some customers submit multiple tickets, indicating recurring support needs.  
- Billing and Account tickets are resolved faster but still require monitoring for escalations.
