In [None]:
#Task1
import numpy as np

n=320
seed = 2703
rng = np.random.default_rng(seed)

tickets = []

routes = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes = ["economy", "premium", "business"]

for i in range(1, n+1):

   
    route_index = i % 5
    day_index = i % 7
    class_index = i % 3

    dtd = i % 60 + 1

    
    base_price = 120 - dtd
    noise = rng.normal(0, 25)
    price = base_price + noise

    
    ticket_class = classes[class_index]

    
    if i % 28 == 0:
        price = ""
    
    if i % 37 == 0:
        ticket_class = ticket_class.upper()

    
    ticket = {
        "ticket_id": f"T{seed}-{i:04d}",
        "route": routes[route_index],
        "day": days[day_index],
        "days_to_departure": dtd,
        "class": ticket_class,
        "price_usd": price
    }

    tickets.append(ticket)

print("Total:", len(tickets))
print(tickets[:5])

In [None]:
#Task2
cleaned_tickets = []

for t in tickets:
    p = t["price_usd"]
    
    if isinstance(p, (int, float)) and p >= 0:
        t_clean = t.copy()
        t_clean["class"] = t_clean["class"].lower()
        cleaned_tickets.append(t_clean)

print(f"Cleaned Records: {len(cleaned_tickets)}")

invalid_count = sum(1 for t in cleaned_tickets if not isinstance(t['price_usd'], (int, float)) or t['price_usd'] < 0)
print(f"Invalid prices remaining: {invalid_count}")


In [None]:
#Task3
prices = np.array([t["price_usd"] for t in cleaned_tickets])
days_arr = np.array([t["day"] for t in cleaned_tickets])

mean_p = np.mean(prices)
std_p = np.std(prices)


unique_days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
daily_totals = {}

for d in unique_days:
    mask = (days_arr == d)
    daily_totals[d] = {
        "revenue": np.round(np.sum(prices[mask]), 2),
        "count": np.sum(mask)
    }

total_rev = np.round(np.sum(prices), 2)
sum_daily = np.round(sum(d["revenue"] for d in daily_totals.values()), 2)
print(f"Consistency Check: Total {total_rev} == Sum Daily {sum_daily} -> {total_rev == sum_daily}")

In [None]:
#Task4
threshold = np.percentile(prices, 90)
high_prices = prices[prices >= threshold]
high_price_count = len(high_prices)

print(f"90th Percentile Threshold: {threshold:.2f}")
print(f"Number of High-Price Tickets: {high_price_count}")

In [None]:
#Task5
report = {
    "total_tickets": len(tickets),
    "cleaned_tickets": len(cleaned_tickets),
    "mean_price": round(mean_p, 2),
    "std_price": round(std_p, 2),
    "daily_totals": daily_totals,
    "high_price_count": high_price_count
}

print("--- AIRLINE PRICING REPORT ---")
for key, value in report.items():
    if key != "daily_totals":
        print(f"{key.replace('_', ' ').title()}: {value}")

print("\nDaily Breakdown:")
for day, stats in report["daily_totals"].items():
    print(f"  {day}: ${stats['revenue']} ({stats['count']} tickets)")

print("\nValidation: All cleaned prices are non-negative and class labels are normalized.")