Task1

In [5]:
import numpy as np
seed_value=1211
n=320
rng=np.random.default_rng(seed_value)

In [6]:
tickets=[]
a=["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes=["economy", "premium", "business"]
for i in range(1,n+1):
    ticket_id=f"T{seed_value}-{i:04d}"
    route=a[(i + seed_value) % 5]
    day=days[(i + seed_value) % 7]
    days_to_departure=1 + ((i * 3 + seed_value) % 60)
    clas= classes[(i * 2 + seed_value) % 3]
    base = 120 + (days_to_departure * -1.5)
    route_adj = [140, 220, 60, 180, 80][(i + seed_value) % 5]
    class_adj = [0, 80, 220][(i * 2 + seed_value) % 3]
    noise = rng.normal(0, 25)
    price_usd=round(base + route_adj + class_adj + noise, 2)
    if i%28==0:
        price_usd = ""
    if i%45==0:
        price_usd = price_usd*-1
    if i%37==0:
        clas=clas.upper()
    tickets.append({"ticket_id":ticket_id,"route":route,"day":day,"days_to_departure":days_to_departure,"class":clas,"price_usd":price_usd})
print("Total records:",len(tickets))

print("First five tickets:")
for ticket in tickets[:5]:
    print(ticket)

Total records: 320
First five tickets:
{'ticket_id': 'T1211-0001', 'route': 'SFO-SEA', 'day': 'Tue', 'days_to_departure': 15, 'class': 'premium', 'price_usd': 190.68}
{'ticket_id': 'T1211-0002', 'route': 'DXB-SIN', 'day': 'Wed', 'days_to_departure': 18, 'class': 'economy', 'price_usd': 277.66}
{'ticket_id': 'T1211-0003', 'route': 'MAD-ROM', 'day': 'Thu', 'days_to_departure': 21, 'class': 'business', 'price_usd': 452.27}
{'ticket_id': 'T1211-0004', 'route': 'NYC-LAX', 'day': 'Fri', 'days_to_departure': 24, 'class': 'premium', 'price_usd': 322.21}
{'ticket_id': 'T1211-0005', 'route': 'LHR-JFK', 'day': 'Sat', 'days_to_departure': 27, 'class': 'economy', 'price_usd': 334.37}


Task2

In [7]:
cleaned_tickets=[]
for item in tickets:
    if item["price_usd"]!="" and item["price_usd"]>=0:
        item["class"]=item["class"].lower()
        cleaned_tickets.append(item)
print("Cleaned count:",len(cleaned_tickets))
print(all([ticket["price_usd"] >= 0 and ticket["price_usd"] !="" for ticket in cleaned_tickets]))
for x in cleaned_tickets[:2]:
    print(x)

Cleaned count: 302
True
{'ticket_id': 'T1211-0001', 'route': 'SFO-SEA', 'day': 'Tue', 'days_to_departure': 15, 'class': 'premium', 'price_usd': 190.68}
{'ticket_id': 'T1211-0002', 'route': 'DXB-SIN', 'day': 'Wed', 'days_to_departure': 18, 'class': 'economy', 'price_usd': 277.66}


Task3

In [8]:
prices=np.array([])
days=np.array([])
for x in cleaned_tickets:
    prices=np.append(prices,x["price_usd"])
    days=np.append(days,x["day"])
prices_mean=prices.mean()
prices_std=prices.std()
print(prices_mean)
print(prices_std)

302.5260927152318
110.80768181689294


In [9]:
unique_days = np.unique(days)
ticket_counts = np.array([np.sum(days == day) for day in unique_days])
print(unique_days)
print(ticket_counts)

['Fri' 'Mon' 'Sat' 'Sun' 'Thu' 'Tue' 'Wed']
[45 33 45 44 45 45 45]


In [10]:
total_revenue = np.array([np.sum(prices[days == day]) for day in unique_days])
print(unique_days)
print(total_revenue)

['Fri' 'Mon' 'Sat' 'Sun' 'Thu' 'Tue' 'Wed']
[13626.6   9838.23 13645.67 13289.26 13982.34 13440.63 13540.15]


In [11]:
daily_totals={}
for k in range(len(unique_days)):
    daily_totals[str(unique_days[k])]=float(total_revenue[k])
print(daily_totals)

{'Fri': 13626.6, 'Mon': 9838.23, 'Sat': 13645.67, 'Sun': 13289.26, 'Thu': 13982.339999999998, 'Tue': 13440.63, 'Wed': 13540.149999999998}


In [12]:
print(prices.sum())
print(total_revenue.sum())#for validation

91362.88
91362.87999999999


Task4

In [13]:
p90 = np.percentile(prices, 90)
print("90th percentile",p90)

90th percentile 457.00800000000004


In [14]:
print("Prices above percentile:",prices[prices>p90])

Prices above percentile: [466.95 463.   504.46 518.25 513.98 523.78 522.68 465.19 462.17 528.06
 512.87 524.92 510.17 525.52 504.45 472.81 457.04 471.3  472.3  477.69
 490.37 533.01 497.44 492.09 476.56 462.   522.12 522.98 508.83 571.78
 457.33]


In [32]:
#first way for count
count=np.sum(prices > p90)#because true=1 false=0
count

np.int64(31)

In [16]:
#second way for count
count2=len(prices[prices>p90])
count2

31

Task5

In [18]:
report = {
    "total_tickets": len(tickets),
    "cleaned_tickets": len(cleaned_tickets),
    "mean_price":prices_mean,
    "std_price":prices_std,
    "daily_totals":daily_totals,
    "high_price_count": count2
}
print(f"Total Tickets: {report['total_tickets']}")
print(f"Cleaned Tickets: {report['cleaned_tickets']}")
print(f"Mean Ticket Price: ${report['mean_price']:.2f}")
print(f"Price Standard Deviation: ${report['std_price']:.2f}")
print(f"Number of High-Price Tickets: {report['high_price_count']}\n")

print("Daily Totals:")
for date, total in report["daily_totals"].items():
    print(f"  {date}: {total}")
assert report["cleaned_tickets"] <= report["total_tickets"], "Error: cleaned_tickets > total_tickets"

Total Tickets: 320
Cleaned Tickets: 302
Mean Ticket Price: $302.53
Price Standard Deviation: $110.81
Number of High-Price Tickets: 31

Daily Totals:
  Fri: 13626.6
  Mon: 9838.23
  Sat: 13645.67
  Sun: 13289.26
  Thu: 13982.339999999998
  Tue: 13440.63
  Wed: 13540.149999999998
