In [127]:
# Importing the necessary packages
import numpy as np                                  # "Scientific computing"
import scipy.stats as stats                         # Statistical tests

import pandas as pd                                 # Data Frame
from pandas.api.types import CategoricalDtype

import matplotlib.pyplot as plt                     # Basic visualisation
from statsmodels.graphics.mosaicplot import mosaic  # Mosaic diagram
import seaborn as sns                               # Advanced data visualisation
import altair as alt                                # Alternative visualisation system

* TUI	Brugge - Oostende	Brindisi	Aantal vluchten	Augustus
* TUI	Antwerpen	Alicante	Gemiddelde prijs	Juni
* TUI	Brussel	Corfu	Aantal vluchten	Mei
* TUI	Luik	Rhodos	Gemiddelde prijs	April
* TUI	Brussel	Brindisi	Vertrekuur	18-jul
* TUI	Luik	Alicante	Aankomstuur	19-jun
* TUI	Brussel	Corfu	Aantal tussenstops	23-mei
* TUI	Brugge - Oostende	Rhodos	Aantal plaatsen beschikbaar	30-mei

In [128]:
tuifly = pd.read_csv('../scraping/tuifly.csv')

tuifly.head()
tuifly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2810 entries, 0 to 2809
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   date_data_recieved  2810 non-null   object 
 1   departDate          2810 non-null   object 
 2   arrivalDate         2810 non-null   object 
 3   flightNumber        2810 non-null   int64  
 4   productId           2810 non-null   object 
 5   depTime             2810 non-null   object 
 6   arrivalTime         2810 non-null   object 
 7   departAirportCode   2810 non-null   object 
 8   arrivalAirportCode  2810 non-null   object 
 9   journeyType         2810 non-null   object 
 10  totalNumberOfStops  2810 non-null   int64  
 11  journeyDuration     2810 non-null   object 
 12  arrivalAirportName  2810 non-null   object 
 13  departAirportName   2810 non-null   object 
 14  availableSeats      2810 non-null   int64  
 15  carrierCode         2810 non-null   object 
 16  carrie

In [129]:
tuifly['departDate'] = pd.to_datetime(tuifly['departDate']) 
tuifly['arrivalDate'] = pd.to_datetime(tuifly['arrivalDate'])

In [130]:
tuifly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2810 entries, 0 to 2809
Data columns (total 19 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date_data_recieved  2810 non-null   object        
 1   departDate          2810 non-null   datetime64[ns]
 2   arrivalDate         2810 non-null   datetime64[ns]
 3   flightNumber        2810 non-null   int64         
 4   productId           2810 non-null   object        
 5   depTime             2810 non-null   object        
 6   arrivalTime         2810 non-null   object        
 7   departAirportCode   2810 non-null   object        
 8   arrivalAirportCode  2810 non-null   object        
 9   journeyType         2810 non-null   object        
 10  totalNumberOfStops  2810 non-null   int64         
 11  journeyDuration     2810 non-null   object        
 12  arrivalAirportName  2810 non-null   object        
 13  departAirportName   2810 non-null   object      

# TUI	Brugge - Oostende	Brindisi	Aantal vluchten	Augustus


In [131]:
tuifly.query('departDate >= "2023-08-01" and departDate <= "2023-09-01" and departAirportCode == "OST" and arrivalAirportCode == "BDS"')

Unnamed: 0,date_data_recieved,departDate,arrivalDate,flightNumber,productId,depTime,arrivalTime,departAirportCode,arrivalAirportCode,journeyType,totalNumberOfStops,journeyDuration,arrivalAirportName,departAirportName,availableSeats,carrierCode,carrierName,totalPrice,adultPrice


# TUI	Antwerpen	Alicante	Gemiddelde prijs	Juni


In [132]:
prijs =tuifly.query("(departDate >= '2023-06-01') and (departDate < '2023-07-01') and (departAirportCode == 'ANR') and (arrivalAirportCode == 'ALC')")['totalPrice'].sum()
prijs

2754.8499999999995

In [133]:
aantal =tuifly.query("(departDate >= '2023-06-01') and (departDate < '2023-07-01') and (departAirportCode == 'ANR') and (arrivalAirportCode == 'ALC')")['totalPrice'].count()
aantal

15

In [134]:
gemiddelde_prijs=prijs/aantal
gemiddelde_prijs

183.65666666666664

# TUI	Brussel	Corfu	Aantal vluchten	Mei


In [141]:
lenBRUCFU = len(tuifly.query("(departDate >= '2023-05-01') and (departDate < '2023-06-01') and (departAirportCode == 'BRU') and (arrivalAirportCode == 'CFU')"))
lenBRUCFU

15

# TUI	Luik	Rhodos	Gemiddelde prijs	April


In [144]:
price = tuifly.query("(departDate >= '2023-04-01')  and (departDate < '2023-05-01') and (departAirportCode == 'LGG') and (arrivalAirportCode == 'RHO')")['totalPrice'].sum()
amount = tuifly.query("(departDate >= '2023-04-01')  and (departDate < '2023-05-01') and (departAirportCode == 'LGG') and (arrivalAirportCode == 'RHO')")['totalPrice'].count()

print(price/amount)

269.99


# TUI	Brussel	Brindisi	Vertrekuur	18-jul


In [146]:
tuifly.query("(departDate == '2023-07-18')  and (departAirportCode == 'BRU') and (arrivalAirportCode == 'BDS')")['depTime']

1415    15:15
Name: depTime, dtype: object

# TUI	Luik	Alicante	Aankomstuur	19-jun


In [147]:
tuifly.query("(departDate == '2023-06-19')  and (departAirportCode == 'LGG') and (arrivalAirportCode == 'ALC')")['arrivalTime']

977    08:45
Name: arrivalTime, dtype: object

# TUI	Brussel	Corfu	Aantal tussenstops	23-mei


In [149]:
tuifly.query("(departDate == '2023-05-23')  and (departAirportCode == 'BRU') and (arrivalAirportCode == 'CFU')")['totalNumberOfStops']

524    1
Name: totalNumberOfStops, dtype: int64

# TUI	Brugge - Oostende	Rhodos	Aantal plaatsen beschikbaar	30-mei

In [151]:
tuifly.query("(departDate == '2023-05-30')  and (departAirportCode == 'OST') and (arrivalAirportCode == 'RHO')")['availableSeats']

645    10
Name: availableSeats, dtype: int64