TASK 1

In [1]:
import numpy as np

seed_value = 604  

n = 320
rng = np.random.default_rng(seed_value)

routes = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes = ["economy", "premium", "business"]

tickets = []

for i in range(1, n+1):

    route_index = (i + seed_value) % 5
    day_index = (i + seed_value) % 7
    class_index = (i * 2 + seed_value) % 3

    days_to_departure = 1 + ((i * 3 + seed_value) % 60)

    base = 120 + (days_to_departure * -1.5)
    route_adj = [140, 220, 60, 180, 80][route_index]
    class_adj = [0, 80, 220][class_index]
    noise = rng.normal(0, 25)

    price = round(base + route_adj + class_adj + noise, 2)

    if i % 28 == 0:
        price = ""
    elif i % 45 == 0:
        price *= -1

    ticket_class = classes[class_index]
    if i % 37 == 0:
        ticket_class = ticket_class.upper()

    tickets.append({
        "ticket_id": f"T{seed_value}-{i:04d}",
        "route": routes[route_index],
        "day": days[day_index],
        "days_to_departure": days_to_departure,
        "class": ticket_class,
        "price_usd": price
    })

print("Total records:", len(tickets))
print("First 5 records:")
for t in tickets[:5]:
    print(t)

Total records: 320
First 5 records:
{'ticket_id': 'T604-0001', 'route': 'NYC-LAX', 'day': 'Thu', 'days_to_departure': 8, 'class': 'economy', 'price_usd': 205.62}
{'ticket_id': 'T604-0002', 'route': 'LHR-JFK', 'day': 'Fri', 'days_to_departure': 11, 'class': 'business', 'price_usd': 545.33}
{'ticket_id': 'T604-0003', 'route': 'SFO-SEA', 'day': 'Sat', 'days_to_departure': 14, 'class': 'premium', 'price_usd': 241.04}
{'ticket_id': 'T604-0004', 'route': 'DXB-SIN', 'day': 'Sun', 'days_to_departure': 17, 'class': 'economy', 'price_usd': 343.17}
{'ticket_id': 'T604-0005', 'route': 'MAD-ROM', 'day': 'Mon', 'days_to_departure': 20, 'class': 'business', 'price_usd': 393.9}


TASK 2 

In [2]:
cleaned_tickets = []

for t in tickets:
    price = t["price_usd"]

    if isinstance(price, (int, float)) and price >= 0:
        cleaned_tickets.append({
            **t,
            "class": t["class"].lower()
        })

print("Cleaned records:", len(cleaned_tickets))

invalid_check = any(
    not isinstance(t["price_usd"], (int,float)) or t["price_usd"] < 0
    for t in cleaned_tickets
)

print("Invalid prices remaining:", invalid_check)

print("Two cleaned records:")
print(cleaned_tickets[:2])

Cleaned records: 302
Invalid prices remaining: False
Two cleaned records:
[{'ticket_id': 'T604-0001', 'route': 'NYC-LAX', 'day': 'Thu', 'days_to_departure': 8, 'class': 'economy', 'price_usd': 205.62}, {'ticket_id': 'T604-0002', 'route': 'LHR-JFK', 'day': 'Fri', 'days_to_departure': 11, 'class': 'business', 'price_usd': 545.33}]


TASK 3

In [3]:
prices = np.array([t["price_usd"] for t in cleaned_tickets])
days_arr = np.array([t["day"] for t in cleaned_tickets])

mean_price = prices.mean()
std_price = prices.std()

unique_days = np.array(["Mon","Tue","Wed","Thu","Fri","Sat","Sun"])

daily_totals = {
    d: prices[days_arr == d].sum()
    for d in unique_days
}

daily_counts = {
    d: (days_arr == d).sum()
    for d in unique_days
}

print("Mean price:", round(mean_price,2))
print("Std price:", round(std_price,2))

print("Revenue check:",
      round(sum(daily_totals.values()),2) == round(prices.sum(),2))

Mean price: 311.53
Std price: 115.64
Revenue check: True


TASK 4

In [4]:
threshold = np.percentile(prices, 90)

high_prices = prices[prices >= threshold]

print("Threshold:", round(threshold,2))
print("High price count:", len(high_prices))

print("Validation:",
      np.all(high_prices >= threshold))

Threshold: 475.26
High price count: 31
Validation: True


TASK 5

In [8]:
report = {
    "total_tickets": len(tickets),
    "cleaned_tickets": len(cleaned_tickets),
    "mean_price": round(mean_price, 2),
    "std_price": round(std_price, 2),
    "daily_totals": {d: round(v, 2) for d, v in daily_totals.items()},
    "high_price_count": len(high_prices)
}

print("--- FINAL REPORT ---")
for key, value in report.items():
    print(key, ":",  value)

revenue_check = round(sum(report["daily_totals"].values()), 2) == round(prices.sum(), 2)
print("Validation: Daily totals match overall revenue:", revenue_check)

--- FINAL REPORT ---
total_tickets : 320
cleaned_tickets : 302
mean_price : 311.53
std_price : 115.64
daily_totals : {np.str_('Mon'): np.float64(14073.91), np.str_('Tue'): np.float64(13749.93), np.str_('Wed'): np.float64(10448.65), np.str_('Thu'): np.float64(13808.47), np.str_('Fri'): np.float64(14376.36), np.str_('Sat'): np.float64(13713.33), np.str_('Sun'): np.float64(13910.8)}
high_price_count : 31
Validation: Daily totals match overall revenue: True
