In [None]:
#task1
import numpy as np
seed_value = 808
n = 320
rng =np.random.default_rng(seed_value)
routes = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days_of_week = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes = ["economy", "premium", "business"]

In [None]:
tickets = []
for i in range(1,n+1):
    route_index = (i + seed_value) % 5
    day_index = (i + seed_value) % 7
    class_index = (i * 2 + seed_value) % 3
    days_to_departure = 1 + ((i * 3 + seed_value) % 60)

    base = 120 + (days_to_departure * -1.5)
    route_adj = [140, 220, 60, 180, 80][route_index]
    class_adj = [0, 80, 220][class_index]
    noise = rng.normal(0, 25)
    price_usd = round(base + route_adj + class_adj + noise, 2)
    ticket_class = classes[class_index]

    if i % 37 == 0:
        ticket_class = ticket_class.upper()
    if i % 28 == 0:
        price_usd = ""
    if i % 45 == 0 and price_usd != "":
        price_usd *= -1


    tickets.append({
        "ticket_id": f"T{seed_value}-{i:04d}",
        "route": routes[route_index],
        "day": days_of_week[day_index],
        "days_to_departure": days_to_departure,
        "class": ticket_class,
        "price_usd": price_usd
    })




In [None]:
print(f"Total records: {len(tickets)}")
print(tickets [:5])

In [None]:
#task2

In [None]:
cleaned_tickets = []

for t in tickets:
    price = t["price_usd"]
    if price == "" or not isinstance(price,(int,float)):
        continue
    if price < 0:
        continue
    clean_t = t.copy()
    clean_t ["class"] = clean_t["class"].lower()
    cleaned_tickets.append(clean_t)
invalid_check = [t for t in cleaned_tickets if t["price_usd"] == "" or t["price_usd"] < 0]



In [None]:
print(f"Cleaned records: {len(cleaned_tickets)}")
print(f"Invalid records remaining: {len(invalid_check)}")
print("Sample cleaned records:", cleaned_tickets[:2])

In [None]:
#task3

In [None]:
prices = np.array([t["price_usd"] for t in cleaned_tickets])
days = np.array([t["day"] for t in cleaned_tickets])
mean_p = np.mean(prices)
std_p = np.std(prices)
daily_totals = {}
unique_days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

In [None]:
for d in unique_days:
    day_mask = (days == d)
    daily_totals[d] = np.sum(prices[day_mask])

total_rev = np.sum(prices)
sum_of_daily = sum(daily_totals.values())
is_valid_sum = np.isclose(total_rev, sum_of_daily)

In [None]:
print(f"Mean: {mean_p:.2f}, Std Dev: {std_p:.2f}")
print(f"Daily Totals: {daily_totals}")
print(f"Sum validation passed: {is_valid_sum}")

In [None]:
#task4

In [None]:
threshold = np.percentile(prices, 90)
high_price_mask = prices >= threshold
high_price_count = np.sum(high_price_mask)
min_high_price = np.min(prices[high_price_mask])

In [None]:
print(f"90th Percentile Threshold: {threshold:.2f}")
print(f"Count of high-price tickets: {high_price_count}")
print(f"Validation (Min high price >= Threshold): {min_high_price >= threshold}")

In [None]:
#task5

In [None]:
report = {
    "total_tickets": len(tickets),
    "cleaned_tickets": len(cleaned_tickets),
    "mean_price": round(float(mean_p), 2),
    "std_price": round(float(std_p), 2),
    "daily_totals": {k: round(v, 2) for k, v in daily_totals.items()},
    "high_price_count": int(high_price_count)
}



In [None]:
print(f"Total tickets: {report['total_tickets']}")
print(f"Cleaned tickets: {report['cleaned_tickets']}")
print(f"Mean price: {report['mean_price']}")
print(f"Std price: {report['std_price']}")
print(f"Daily totals: {report['daily_totals']}")
print(f"High price count: {report['high_price_count']}")
print(f"Validation: {sum_of_daily:.2f} == {total_rev:.2f} -> {is_valid_sum}")