# Task 1


In [1]:
import random
from datetime import datetime , timedelta

In [2]:
random.seed(11)

In [3]:
def generate_support_tickets(num_entry = 200):
    categories = ["Login Error" , "App Crash" , "Refund" , "Bug Report" , "Slow Performance"]
    category_weights = [0.2, 0.1 ,0.4, 0.1 ,0.2 ]

    raw_dataset = []

    for i in range(num_entry):
        ticket_id = 10000 + i*random.randint(1,10)
        customer_id = random.randint(1000, 10000)
        category = random.choices(categories, weights=category_weights)[0]
        escalated = random.choice([True, False])

        day_range = random.randint(1, 31)
        time = datetime.now() -timedelta(days=day_range)

        error_change = random.random() #I add %10 chance for malformed data
        if error_change < 0.05:
            res_min = None
        elif error_change < 0.10:
            res_min = "Error"
        else:
            res_min = random.randint(15, 1440)

        dataset_entry = {
            "ticket_id": ticket_id,
            "customer_id": customer_id,
            "category": category,
            "resolution_minutes": res_min,
            "escalated": escalated,
            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
        }

        raw_dataset.append(dataset_entry)
    return raw_dataset


In [4]:
logs = generate_support_tickets()
for data in logs[:5]:
    print(data)

{'ticket_id': 10000, 'customer_id': 8629, 'category': 'Refund', 'resolution_minutes': 989, 'escalated': True, 'timestamp': '2026-02-12 20:57:31'}
{'ticket_id': 10010, 'customer_id': 4050, 'category': 'Login Error', 'resolution_minutes': 'Error', 'escalated': False, 'timestamp': '2026-02-13 20:57:31'}
{'ticket_id': 10002, 'customer_id': 7490, 'category': 'Slow Performance', 'resolution_minutes': None, 'escalated': True, 'timestamp': '2026-01-29 20:57:31'}
{'ticket_id': 10027, 'customer_id': 2034, 'category': 'Login Error', 'resolution_minutes': 76, 'escalated': True, 'timestamp': '2026-01-20 20:57:31'}
{'ticket_id': 10032, 'customer_id': 6345, 'category': 'Refund', 'resolution_minutes': 617, 'escalated': True, 'timestamp': '2026-02-01 20:57:31'}


# Task 2

In [5]:
def keyValidation(logs):
    expected_keys = {"ticket_id", "customer_id", "category", "resolution_minutes", "escalated", "timestamp"}
    return [ data for data in logs if expected_keys.issubset(data.keys()) ]

def validationForResolutionTime(logs):
    return [ data for data in logs
             if isinstance(data.get("resolution_minutes"), int)
             ]


# Task 3

In [6]:
def normalizeCategory(logs):
    return [
        {
            **log,
            "category": str(log.get("category")).strip().lower(),
        }
        for log in logs
    ]
logs_with_key = keyValidation(logs)

logs_with_true_resolution_time = validationForResolutionTime(logs_with_key)

clean_logs = normalizeCategory(logs_with_true_resolution_time)
print(len(logs),len(clean_logs),clean_logs[:5])


200 176 [{'ticket_id': 10000, 'customer_id': 8629, 'category': 'refund', 'resolution_minutes': 989, 'escalated': True, 'timestamp': '2026-02-12 20:57:31'}, {'ticket_id': 10027, 'customer_id': 2034, 'category': 'login error', 'resolution_minutes': 76, 'escalated': True, 'timestamp': '2026-01-20 20:57:31'}, {'ticket_id': 10032, 'customer_id': 6345, 'category': 'refund', 'resolution_minutes': 617, 'escalated': True, 'timestamp': '2026-02-01 20:57:31'}, {'ticket_id': 10040, 'customer_id': 1075, 'category': 'refund', 'resolution_minutes': 1143, 'escalated': False, 'timestamp': '2026-01-28 20:57:31'}, {'ticket_id': 10012, 'customer_id': 5161, 'category': 'refund', 'resolution_minutes': 158, 'escalated': True, 'timestamp': '2026-02-01 20:57:31'}]


# Task 5

In [7]:
%run m1-02-summary-functions.py

def generate_summary_report(logs):
    report = {
        "average_resolution_time": findAverageForPerCategory(logs),
        "escalation_rates": findEscalationRate(logs),
        "customer_ticket_counts": findTicketsPerCustomer(logs)
    }
    return report

In [8]:
final_report = generate_summary_report(clean_logs)

print(f"Total Records Analyzed: {len(clean_logs)}")

print("\n1. Average resolution time for per category:")
print(final_report["average_resolution_time"])

print("\n2. Escalation Rates for per category and overall:")
print(final_report["escalation_rates"])

print("\n3. Top 5 Customers by Ticket Count:")
sorted_customers = sorted(final_report["customer_ticket_counts"].items(), key=lambda x: x[1], reverse=True)[:5]
print(dict(sorted_customers))

Total Records Analyzed: 176

1. Average resolution time for per category:
{'refund': 693.81, 'login error': 683.2, 'app crash': 830.36, 'slow performance': 824.82, 'bug report': 696.19}

2. Escalation Rates for per category and overall:
{'refund': 0.55, 'login error': 0.44, 'app crash': 0.36, 'slow performance': 0.44, 'bug report': 0.43, 'overall': 0.4772727272727273}

3. Top 5 Customers by Ticket Count:
{2218: 2, 8629: 1, 2034: 1, 6345: 1, 1075: 1}
