In [1]:
import numpy as np
import pandas as pd
import random

In [7]:
# --- TASK 1 ---

import numpy as np

seed_value = 1129
n = 320
rng = np.random.default_rng(seed_value)

route_list = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
day_list = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
class_list = ["economy", "premium", "business"]

tickets = [] 

for i in range(1, n + 1):
    dtd = 1 + ((i * 3 + seed_value) % 60)
    r_idx = (i + seed_value) % 5
    d_idx = (i + seed_value) % 7
    c_idx = (i * 2 + seed_value) % 3
    
    base = 120 + (dtd * -1.5)
    route_adj = [140, 220, 60, 180, 80][r_idx]
    class_adj = [0, 80, 220][c_idx]
    noise = rng.normal(0, 25)
    price_usd = round(base + route_adj + class_adj + noise, 2)
    
    current_class = class_list[c_idx]

    if i % 28 == 0:
        price_usd = ""
    if i % 45 == 0:
        price_usd = price_usd * -1
    if i % 37 == 0:
        current_class = current_class.upper()

    ticket_dict = {
        "ticket_id": f"T{seed_value}-{i:04d}",
        "route": route_list[r_idx], # Burada siyahıdan adı çəkirik
        "day": day_list[d_idx],     # Burada günün adını çəkirik
        "days_to_departure": dtd,
        "class": current_class,
        "price_usd": price_usd
    }
    tickets.append(ticket_dict) 

print(f"Total records: {len(tickets)}")


Total records: 320


In [14]:
# --- TASK 2 ---

cleaned_tickets = []
for i in tickets:
    price = i["price_usd"]
    is_invalid = (price == "" or price is None or (isinstance(price,(int,float)) and (np.isnan(price) or price<0)))

    if not is_invalid:
        new_ticket = i.copy()
        new_ticket["class"] = new_ticket["class"].lower()
        cleaned_tickets.append(new_ticket)
print(len(cleaned_tickets))

302


In [31]:
# --- TASK 3 --- 

prices_list = [i["price_usd"] for i in cleaned_tickets]
days_list = [i["days_to_departure"] for i in cleaned_tickets]

prices_array = np.array(prices_list)
days_array = np.array(days_list)

prices_mean = np.mean(prices_array)
prices_std = np.std(prices_array)

print(prices_mean)
print(prices_std)

df = pd.DataFrame(cleaned_tickets)
print(f"total revenue per {df.groupby("day")["price_usd"].sum()}")
print("-------------")
print(f"total tickets per {df.groupby("day")["ticket_id"].count()}")

309.1545695364238
115.73330446575456


In [50]:
# --- TASK 4 ---

threshold = np.percentile(df["price_usd"] , 90)

high_price = df[df["price_usd"] >= threshold]

high_price_count = high_price.count()

print(high_price)
print(len(high_price_count))

      ticket_id    route  day  days_to_departure     class  price_usd
13   T1129-0014  DXB-SIN  Wed                 32  business     470.95
16   T1129-0017  LHR-JFK  Sat                 41  business     530.54
27   T1129-0029  DXB-SIN  Thu                 17  business     482.78
30   T1129-0032  LHR-JFK  Sun                 26  business     539.66
42   T1129-0044  DXB-SIN  Fri                  2  business     541.51
44   T1129-0047  LHR-JFK  Mon                 11  business     525.12
70   T1129-0074  DXB-SIN  Sun                 32  business     481.83
73   T1129-0077  LHR-JFK  Wed                 41  business     477.42
84   T1129-0089  DXB-SIN  Mon                 17  business     487.58
86   T1129-0092  LHR-JFK  Thu                 26  business     542.55
98   T1129-0104  DXB-SIN  Tue                  2  business     539.65
101  T1129-0107  LHR-JFK  Fri                 11  business     510.94
115  T1129-0122  LHR-JFK  Sat                 56  business     469.80
129  T1129-0137  LHR

In [68]:
# --- TASK 5 ---

total_tickets = df["ticket_id"].size
cleaned_tickets_len = len(cleaned_tickets)
mean_price = float(df["price_usd"].mean())
std_price = float(df["price_usd"].std())
daily_totals = df.groupby("day")["ticket_id"].count()
high_price_count = high_price.size
report = { "total_tickets" : total_tickets,
           "cleaned_tickets" : cleaned_tickets_len,
           "mean_price" : mean_price,
           "std_price" : std_price,
           "daily_totals" : daily_totals,
           "high_price_count" : high_price_count
         }
report

{'total_tickets': 302,
 'cleaned_tickets': 302,
 'mean_price': 309.1545695364238,
 'std_price': 115.92539307007746,
 'daily_totals': day
 Fri    45
 Mon    45
 Sat    45
 Sun    45
 Thu    45
 Tue    44
 Wed    33
 Name: ticket_id, dtype: int64,
 'high_price_count': 186}