<h1>Task 1</h1>


In [1]:
import numpy as np

seed_value = 2602
n = 320
rng = np.random.default_rng(seed_value)


In [2]:
def list_gen(s):
    tickets = []
    for i in range(1, s + 1):
        ticket = {
            'ticket_id' : f'T{seed_value}-{i:04d}',
            'route' : ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"][(i + seed_value) % 5],
            'day' : ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][(i + seed_value) % 7],
            'days_to_departure' : 1 + ((i * 3 + seed_value) % 60),
            'class' : ["economy", "premium", "business"][(i * 2 + seed_value) % 3]
        }

        if i % 28 == 0:
            price_usd = ''
        else:    
            base = 120 + (ticket['days_to_departure'] * -1.5)
            route_adj = [140, 220, 60, 180, 80][(i + seed_value) % 5]
            class_adj = [0, 80, 220][(i * 2 + seed_value) % 3]
            noise = rng.normal(0, 25)
            price_usd = round(base + route_adj + class_adj + noise, 2)
            
        
        if i % 45 == 0:
            price_usd = price_usd * -1
        
        if i % 37 == 0:
            ticket['class'] = ticket['class'].upper()
        
        ticket['price_usd'] = price_usd
        tickets.append(ticket)

    return tickets

tickets = list_gen(n)
tickets[:5]

    

[{'ticket_id': 'T2602-0001',
  'route': 'DXB-SIN',
  'day': 'Sun',
  'days_to_departure': 26,
  'class': 'economy',
  'price_usd': 307.59},
 {'ticket_id': 'T2602-0002',
  'route': 'MAD-ROM',
  'day': 'Mon',
  'days_to_departure': 29,
  'class': 'business',
  'price_usd': 395.66},
 {'ticket_id': 'T2602-0003',
  'route': 'NYC-LAX',
  'day': 'Tue',
  'days_to_departure': 32,
  'class': 'premium',
  'price_usd': 236.28},
 {'ticket_id': 'T2602-0004',
  'route': 'LHR-JFK',
  'day': 'Wed',
  'days_to_departure': 35,
  'class': 'economy',
  'price_usd': 302.12},
 {'ticket_id': 'T2602-0005',
  'route': 'SFO-SEA',
  'day': 'Thu',
  'days_to_departure': 38,
  'class': 'business',
  'price_usd': 320.86}]

<h1>Task 2</h1>

In [3]:
def cleaning(records):
    cleaned = []
    invalid = []
    new_records = records.copy()
    for record in new_records:

        price = record['price_usd']
        record['class'] = record['class'].lower()

        
        if price in ["", None]:
            invalid.append(record)
            continue

        try:
            price_val = float(price)
        except:
            invalid.append(record)
            continue


        if price_val < 0:
            price_val = -price_val

        record['price_usd'] = price_val

        cleaned.append(record)


    return cleaned, invalid

cleaned_tickets, invalid_tickets = cleaning(tickets)

print(f'After cleanig:\nClean tickets: {len(cleaned_tickets)}\nInvalid tickets: {len(invalid_tickets)}')

cleaned_tickets[:2]


After cleanig:
Clean tickets: 309
Invalid tickets: 11


[{'ticket_id': 'T2602-0001',
  'route': 'DXB-SIN',
  'day': 'Sun',
  'days_to_departure': 26,
  'class': 'economy',
  'price_usd': 307.59},
 {'ticket_id': 'T2602-0002',
  'route': 'MAD-ROM',
  'day': 'Mon',
  'days_to_departure': 29,
  'class': 'business',
  'price_usd': 395.66}]

<h1>Task 3</h1>

In [4]:
price_usd = [ticket['price_usd'] for ticket in cleaned_tickets]
prices = np.array(price_usd)

days_to_departure = [ticket['days_to_departure'] for ticket in cleaned_tickets]
days = np.array(days_to_departure)

print(len(days))
print(len(prices))
print(len(cleaned_tickets))

309
309
309


In [5]:
mean_prices = np.mean(prices)
std_prices = np.std(prices)

print(mean_prices)
print(std_prices)

312.4242071197411
114.9095605252213


In [6]:
max_day = days.max()

daily_revenue = np.bincount(days, weights=prices)

daily_counts = np.bincount(days)


print(daily_counts)
print('\n')
print(daily_revenue)

[ 0  0 16  0  0 16  0  0 16  0  0 14  0  0 16  0  0 16  0  0 16  0  0 14
  0  0 16  0  0 16  0  0 16  0  0 14  0  0 16  0  0 16  0  0 16  0  0 13
  0  0 16  0  0 16  0  0 16  0  0 14]


[   0.      0.   5611.83    0.      0.   7036.14    0.      0.   4235.32
    0.      0.   5270.23    0.      0.   4620.85    0.      0.   5518.86
    0.      0.   6384.39    0.      0.   3443.62    0.      0.   5722.47
    0.      0.   4309.27    0.      0.   4951.9     0.      0.   5310.79
    0.      0.   3721.92    0.      0.   5454.54    0.      0.   3563.25
    0.      0.   3891.03    0.      0.   5788.13    0.      0.   3160.18
    0.      0.   5376.11    0.      0.   3168.25]


In [7]:
total_revenue = np.sum(prices)
sum_daily_revenue = np.sum(daily_revenue)
print(f'Total revenue: {total_revenue}')
print(f'Total of daily revenue: {sum_daily_revenue}')
print(f'Check: {total_revenue == sum_daily_revenue}')

Total revenue: 96539.08
Total of daily revenue: 96539.08
Check: True


<h1>Task 4</h1>

In [8]:
threshold = np.percentile(prices, 90)
high_price_tickets = [ticket for ticket in cleaned_tickets if ticket['price_usd'] >= threshold]
print(f'Threshold: {threshold}')
print(f'Number of high priced tickets: {len(high_price_tickets)}')

Threshold: 475.846
Number of high priced tickets: 31


<h1>Task 5</h1>

In [9]:
unique_days = np.unique(days)
daily_totals = {day: daily_revenue[day] for day in unique_days}

report = {
    "total_tickets": len(tickets),
    "cleaned_tickets": len(cleaned_tickets),
    "mean_price": mean_prices,
    "std_price": std_prices,
    "daily_totals": daily_totals,
    "high_price_count": len(high_price_tickets)
}


report["validation_total_revenue"] = np.isclose(total_revenue, sum_daily_revenue)


print(f"Total tickets generated: {report['total_tickets']}")
print(f"Cleaned tickets: {report['cleaned_tickets']}")
print(f"Mean price: ${report['mean_price']}")
print(f"Std price: ${report['std_price']}")
print(f"High priced tickets: {report['high_price_count']}")
print(f"Validation: {report['validation_total_revenue']}")



Total tickets generated: 320
Cleaned tickets: 309
Mean price: $312.4242071197411
Std price: $114.9095605252213
High priced tickets: 31
Validation: True
