In [1]:
import numpy as np

### Task1

In [2]:
seed_value = 704
n = 320
rng = np.random.default_rng(seed_value)
routes = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days_list = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes = ["economy", "premium", "business"]
tickets = []
for i in range(1, n+1):
    route_index = (i + seed_value) % 5
    day_index = (i + seed_value) % 7
    class_index = (i*2 + seed_value) % 3
    days_to_departure = 1 + ((i*3 + seed_value) % 60)
    base = 120 + (days_to_departure * -1.5)
    route_adj = [140, 220, 60, 180, 80][route_index]
    class_adj = [0, 80, 220][class_index]
   
    noise = rng.normal(0,25)

    price_usd = round(base + route_adj + class_adj + noise, 2)

    if i % 28 == 0:
        price_usd = ""

    elif i % 45 == 0:
        price_usd *= -1

    ticket_class = classes[class_index]

    if i % 37 == 0:
        ticket_class = ticket_class.upper()
    
    ticket = {

        "ticket_id": f"T{seed_value}-{i:04d}",
        "route": routes[route_index],
        "day": days_list[day_index],
        "days_to_departure": days_to_departure,
        "class": ticket_class,
        "price_usd": price_usd
    }

    tickets.append(ticket)

print(len(tickets))
print(tickets[:5])


320
[{'ticket_id': 'T704-0001', 'route': 'NYC-LAX', 'day': 'Sat', 'days_to_departure': 48, 'class': 'premium', 'price_usd': 265.12}, {'ticket_id': 'T704-0002', 'route': 'LHR-JFK', 'day': 'Sun', 'days_to_departure': 51, 'class': 'economy', 'price_usd': 239.99}, {'ticket_id': 'T704-0003', 'route': 'SFO-SEA', 'day': 'Mon', 'days_to_departure': 54, 'class': 'business', 'price_usd': 328.17}, {'ticket_id': 'T704-0004', 'route': 'DXB-SIN', 'day': 'Tue', 'days_to_departure': 57, 'class': 'premium', 'price_usd': 350.05}, {'ticket_id': 'T704-0005', 'route': 'MAD-ROM', 'day': 'Wed', 'days_to_departure': 60, 'class': 'economy', 'price_usd': 98.1}]


### Task2

In [3]:
cleaned_tickets = []

for t in tickets:

    price = t["price_usd"]

    if isinstance(price, (int,float)) and price > 0:

        t["class"] = t["class"].lower()
        cleaned_tickets.append(t)

print(len(cleaned_tickets))
print(cleaned_tickets[:2])

302
[{'ticket_id': 'T704-0001', 'route': 'NYC-LAX', 'day': 'Sat', 'days_to_departure': 48, 'class': 'premium', 'price_usd': 265.12}, {'ticket_id': 'T704-0002', 'route': 'LHR-JFK', 'day': 'Sun', 'days_to_departure': 51, 'class': 'economy', 'price_usd': 239.99}]


### Task3

In [4]:
prices = np.array([t["price_usd"] for t in cleaned_tickets])
days = np.array([t["day"] for t in cleaned_tickets])

mean_price = np.mean(prices)
std_price = np.std(prices)

unique_days, daily_totals = np.unique(days, return_counts=False), []

for d in unique_days:

    daily_totals.append(np.sum(prices[days==d]))

daily_totals = dict(zip(unique_days, daily_totals))

assert round(sum(daily_totals.values()),2) == round(np.sum(prices),2)

### Task4

In [5]:
threshold = np.percentile(prices,90)

high_price = prices[prices >= threshold]

high_price_count = len(high_price)

assert np.all(high_price >= threshold)

### Task5

In [6]:
report = {

"total_tickets": len(tickets),
"cleaned_tickets": len(cleaned_tickets),
"mean_price": mean_price,
"std_price": std_price,
"daily_totals": daily_totals,
"high_price_count": high_price_count
}

print(report)

{'total_tickets': 320, 'cleaned_tickets': 302, 'mean_price': np.float64(308.26695364238407), 'std_price': np.float64(114.5970244201458), 'daily_totals': {np.str_('Fri'): np.float64(10185.4), np.str_('Mon'): np.float64(14027.33), np.str_('Sat'): np.float64(14115.080000000002), np.str_('Sun'): np.float64(13725.689999999999), np.str_('Thu'): np.float64(13712.900000000001), np.str_('Tue'): np.float64(13803.9), np.str_('Wed'): np.float64(13526.320000000002)}, 'high_price_count': 31}
