# Task 1

In [16]:
import numpy as np
rng = np.random.default_rng(seed=1402)
seed_value = 1402
n = 320

In [17]:
routes = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes = ["economy", "premium", "business"]
route_adjs = [140, 220, 60, 180, 80]
class_adjs = [0, 80, 220]
tickets= []
for i in range (1,n+1):
    route_indx = (i + seed_value) % 5
    day_indx = (i + seed_value) % 7
    days_to_departure = 1 + ((i * 3 + seed_value) % 60)
    class_indx = (i * 2 + seed_value) % 3
    #pricing
    base = 120 + (days_to_departure * -1.5)
    noise = rng.normal(0, 25)
    price_usd = round(base + route_adjs[route_indx] + class_adjs[class_indx] + noise, 2)

    current_class = classes[class_indx]
    if i % 28 == 0:
        price_usd = ""
    if i % 45 == 0:
        price_usd = price_usd * -1
    if i % 37 == 0:
        current_class = current_class.upper()
    ticket = {
        "ticket_id": f"T{seed_value}-{i:04d}",
        "route": routes[route_indx],
        "day": days[day_indx],
        "days_to_departure":days_to_departure ,
        "class": current_class,
        "price_usd": price_usd
    }
    tickets.append(ticket)

tickets[:5]          
#top 5    
print(f"Total records: {len(tickets)}")    

Total records: 320


In [18]:
tickets[:5]          
#top 5  

[{'ticket_id': 'T1402-0001',
  'route': 'DXB-SIN',
  'day': 'Thu',
  'days_to_departure': 26,
  'class': 'economy',
  'price_usd': 223.6},
 {'ticket_id': 'T1402-0002',
  'route': 'MAD-ROM',
  'day': 'Fri',
  'days_to_departure': 29,
  'class': 'business',
  'price_usd': 343.85},
 {'ticket_id': 'T1402-0003',
  'route': 'NYC-LAX',
  'day': 'Sat',
  'days_to_departure': 32,
  'class': 'premium',
  'price_usd': 279.62},
 {'ticket_id': 'T1402-0004',
  'route': 'LHR-JFK',
  'day': 'Sun',
  'days_to_departure': 35,
  'class': 'economy',
  'price_usd': 261.61},
 {'ticket_id': 'T1402-0005',
  'route': 'SFO-SEA',
  'day': 'Mon',
  'days_to_departure': 38,
  'class': 'business',
  'price_usd': 384.42}]

# Task 2

In [50]:
uncleaned_tickets=[]
cleaned_tickets = tickets.copy()
def Missing_values(tickets):
    for ticket in tickets:
        if ticket['price_usd'] == '':
            uncleaned_tickets.append(ticket)
            cleaned_tickets.remove(ticket)
    
        elif int(ticket['price_usd']) < 0:
            uncleaned_tickets.append(ticket)
            cleaned_tickets.remove(ticket)
            
            
Missing_values(tickets)
for ticket in cleaned_tickets:
    ticket['class']= ticket['class'].lower()


In [51]:
len(cleaned_tickets)

302

In [52]:
len(uncleaned_tickets)

18

In [53]:
len(tickets)

320

In [49]:
print("\nFirst two cleaned records:")
for t in cleaned_tickets[:2]:
    print(t)


First two cleaned records:
{'ticket_id': 'T1402-0001', 'route': 'DXB-SIN', 'day': 'Thu', 'days_to_departure': 26, 'class': 'economy', 'price_usd': 223.6}
{'ticket_id': 'T1402-0002', 'route': 'MAD-ROM', 'day': 'Fri', 'days_to_departure': 29, 'class': 'business', 'price_usd': 343.85}


# Task 3

In [91]:
prices_list =[]
for ticket in cleaned_tickets:
    prices_list.append(ticket['price_usd'])
prices_array=np.array(prices_list)

In [92]:
prices_array[:3]

array([223.6 , 343.85, 279.62])

In [93]:
all_days = np.array([t['day'] for t in cleaned_tickets])

In [94]:
prices_array.mean()

np.float64(310.68718543046356)

In [95]:
prices_array.std()

np.float64(115.78604706551792)

In [96]:
unique_days, inverse_indices = np.unique(all_days, return_inverse=True)

In [97]:
daily_counts = np.bincount(inverse_indices)
daily_counts

array([45, 45, 45, 45, 45, 44, 33])

In [98]:
daily_revenue = np.bincount(inverse_indices, weights=prices_array)
daily_revenue

array([14270.23, 13945.78, 13789.88, 13907.37, 13975.36, 13577.09,
       10361.82])

In [99]:
print("Day | Count | Total Revenue")
print("-" * 30)
for i in range(len(unique_days)):
    print(f"{unique_days[i]} | {daily_counts[i]} | {daily_revenue[i]:.2f}")

Day | Count | Total Revenue
------------------------------
Fri | 45 | 14270.23
Mon | 45 | 13945.78
Sat | 45 | 13789.88
Sun | 45 | 13907.37
Thu | 45 | 13975.36
Tue | 44 | 13577.09
Wed | 33 | 10361.82


In [104]:
overall_total = prices_array.sum()
daily_sum_total = daily_revenue.sum()

In [105]:
print(f"\nOverall Total: {overall_total:.2f}")
print(f"Daily Sum Total: {daily_sum_total:.2f}")
print(f"Validation Passed: {np.isclose(overall_total, daily_sum_total)}")


Overall Total: 93827.53
Daily Sum Total: 93827.53
Validation Passed: True


# Task 4

In [106]:
threshold = np.percentile(prices_array, 90)

In [108]:
high_price_tickets = prices_array[prices_array >= threshold]

In [109]:
high_price_tickets

array([576.98, 471.48, 479.82, 526.06, 511.86, 499.59, 512.48, 596.48,
       506.26, 489.54, 543.1 , 513.33, 518.08, 535.93, 523.63, 523.71,
       527.16, 540.39, 491.96, 498.94, 494.83, 524.35, 542.95, 545.25,
       504.68, 535.15, 514.73, 497.2 , 513.73, 535.73, 531.21])

In [110]:
print(f"Threshold: {threshold:.2f}")
print(f"length: {len(high_price_tickets)}")
print(f"checking: {np.all(high_price_tickets >= threshold)}")

Threshold: 470.66
length: 31
checking: True


# Task 5

In [117]:
print("=== SIMULATION FINAL REPORT ===")
print(f"total_tickets: {len(tickets)}")
print(f"cleaned_tickets: {len(cleaned_tickets)}")
print(f"mean_price: {prices_array.mean()}")
print(f"daily_totals: {daily_revenue}")
print(f"high_price_count: {len(high_price_tickets)}")
print(f"std_price: {prices_array.std()}")
print("=============================== ")

=== SIMULATION FINAL REPORT ===
total_tickets: 320
cleaned_tickets: 302
mean_price: 310.68718543046356
daily_totals: [14270.23 13945.78 13789.88 13907.37 13975.36 13577.09 10361.82]
high_price_count: 31
std_price: 115.78604706551792
