<h1>Task 1</h1>


In [1]:
import numpy as np

seed_value = 2602
n = 320
rng = np.random.default_rng(seed_value)


In [None]:
def list_gen(s):
    tickets = []
    for i in range(1, s + 1):
        ticket = {
            'ticket_id' : f'T{seed_value}-{i:04d}',
            'route' : ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"][(i + seed_value) % 5],
            'day' : ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][(i + seed_value) % 7],
            'days_to_departure' : 1 + ((i * 3 + seed_value) % 60),
            'class' : ["economy", "premium", "business"][(i * 2 + seed_value) % 3]
        }

        if i % 28 == 0:
            price_usd = ''
        else:    
            base = 120 + (ticket['days_to_departure'] * -1.5)
            route_adj = [140, 220, 60, 180, 80][(i + seed_value) % 5]
            class_adj = [0, 80, 220][(i * 2 + seed_value) % 3]
            noise = rng.normal(0, 25)
            price_usd = round(base + route_adj + class_adj + noise, 2)
            
        
        if i % 45 == 0:
            price_usd = price_usd * -1
        
        if i % 37 == 0:
            ticket['class'] = ticket['class'].upper()
        
        ticket['price_usd'] = price_usd
        tickets.append(ticket)

    return tickets

tickets = list_gen(n)
tickets[:5]

    

[{'ticket_id': 'T2602-0001',
  'route': 'DXB-SIN',
  'day': 'Sun',
  'days_to_departure': 26,
  'class': 'economy',
  'price_usd': 307.59},
 {'ticket_id': 'T2602-0002',
  'route': 'MAD-ROM',
  'day': 'Mon',
  'days_to_departure': 29,
  'class': 'business',
  'price_usd': 395.66},
 {'ticket_id': 'T2602-0003',
  'route': 'NYC-LAX',
  'day': 'Tue',
  'days_to_departure': 32,
  'class': 'premium',
  'price_usd': 236.28},
 {'ticket_id': 'T2602-0004',
  'route': 'LHR-JFK',
  'day': 'Wed',
  'days_to_departure': 35,
  'class': 'economy',
  'price_usd': 302.12},
 {'ticket_id': 'T2602-0005',
  'route': 'SFO-SEA',
  'day': 'Thu',
  'days_to_departure': 38,
  'class': 'business',
  'price_usd': 320.86}]

<h1>Task 2</h1>

In [3]:
def cleaning(records):
    cleaned = []
    invalid = []
    new_records = records.copy()
    for record in new_records:

        price = record['price_usd']
        record['class'] = record['class'].lower()

        
        if price in ["", None]:
            invalid.append(record)
            continue

        try:
            price_val = float(price)
        except:
            invalid.append(record)
            continue


        if price_val < 0:
            invalid.append(record)
            continue


        record['price_usd'] = price_val

        cleaned.append(record)


    return cleaned, invalid

cleaned_tickets, invalid_tickets = cleaning(tickets)

print(f'After cleanig:\nClean tickets: {len(cleaned_tickets)}\nInvalid tickets: {len(invalid_tickets)}')

cleaned_tickets[:2]


After cleanig:
Clean tickets: 302
Invalid tickets: 18


[{'ticket_id': 'T2602-0001',
  'route': 'DXB-SIN',
  'day': 'Sun',
  'days_to_departure': 26,
  'class': 'economy',
  'price_usd': 307.59},
 {'ticket_id': 'T2602-0002',
  'route': 'MAD-ROM',
  'day': 'Mon',
  'days_to_departure': 29,
  'class': 'business',
  'price_usd': 395.66}]

<h1>Task 3</h1>

In [4]:
price_usd = [ticket['price_usd'] for ticket in cleaned_tickets]
prices = np.array(price_usd)

days = np.array([ticket['day'] for ticket in cleaned_tickets])
unique_days, day_codes = np.unique(days, return_inverse=True)

print(len(days))
print(len(prices))
print(len(cleaned_tickets))

302
302
302


In [5]:
mean_prices = np.mean(prices)
std_prices = np.std(prices)

print(mean_prices)
print(std_prices)

314.59245033112586
115.1906117832634


In [6]:
daily_revenue = np.bincount(day_codes, weights=prices)
daily_counts = np.bincount(day_codes)

print(daily_counts)
print('\n')
print(daily_revenue)

[44 45 33 45 45 45 45]


[13736.64 14335.14 10499.18 14216.02 14122.23 13890.88 14206.83]


In [7]:
total_revenue = np.sum(prices)
sum_daily_revenue = np.sum(daily_revenue)
print(f'Total revenue: {total_revenue}')
print(f'Total of daily revenue: {sum_daily_revenue}')
print(f'Check: {total_revenue == sum_daily_revenue}')

Total revenue: 95006.92000000001
Total of daily revenue: 95006.92
Check: False


<h1>Task 4</h1>

In [8]:
threshold = np.percentile(prices, 90)
high_price_tickets = [ticket for ticket in cleaned_tickets if ticket['price_usd'] >= threshold]
print(f'Threshold: {threshold}')
print(f'Number of high priced tickets: {len(high_price_tickets)}')
print(all(ticket['price_usd'] >= threshold for ticket in high_price_tickets))


Threshold: 477.7920000000001
Number of high priced tickets: 31
True


<h1>Task 5</h1>

In [9]:
unique_days, day_codes = np.unique(days, return_inverse=True)
daily_revenue = np.bincount(day_codes, weights=prices)
daily_totals = dict(zip(unique_days, daily_revenue))

report = {
    "total_tickets": len(tickets),
    "cleaned_tickets": len(cleaned_tickets),
    "mean_price": mean_prices,
    "std_price": std_prices,
    "daily_totals": daily_totals,
    "high_price_count": len(high_price_tickets),
    "validation_total_revenue": np.isclose(total_revenue, sum_daily_revenue)
}

print(f"Total tickets generated: {report['total_tickets']}")
print(f"Cleaned tickets: {report['cleaned_tickets']}")
print(f"Mean price: ${report['mean_price']}")
print(f"Std price: ${report['std_price']}")
print(f"High priced tickets: {report['high_price_count']}")
print('Daily Totals:')

for key, value in report['daily_totals'].items():
    print(f"{key}: {value:.2f}")

print(f"Validation: {report['validation_total_revenue']}")

Total tickets generated: 320
Cleaned tickets: 302
Mean price: $314.59245033112586
Std price: $115.1906117832634
High priced tickets: 31
Daily Totals:
Fri: 13736.64
Mon: 14335.14
Sat: 10499.18
Sun: 14216.02
Thu: 14122.23
Tue: 13890.88
Wed: 14206.83
Validation: True
