# Step 1 - automated data ingestion

In [4]:
import requests
from datetime import datetime, timedelta
from os import environ
import pandas as pd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import plotly.express as px
from sqlalchemy.engine.url import URL
from sqlalchemy_utils import database_exists, create_database, drop_database


api_key = environ.get('aviation_key')
environ['aviation_key'] = '1666612900443c14dc413bc4df9d749b'
api_url = 'https://api.aviationstack.com/v1/flights'

def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days) + 1):
        yield start_date + timedelta(n)

def fetch_flights_for_date(flight_date, airline):
    params = {
        'access_key': api_key,
        'flight_date': flight_date.strftime('%Y-%m-%d'),
        'airline_name': airline
    }
    
    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        data = response.json().get('data', [])
        return data
    else:
        print(f"Failed to fetch data for {flight_date}: {response.status_code}")
        return []

def fetch_flights(airline, start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    all_flights = []

    for single_date in daterange(start_date, end_date):
        flights_on_date = fetch_flights_for_date(single_date, airline)
        all_flights.extend(flights_on_date)

    if all_flights:
        return pd.DataFrame(all_flights)
    else:
        return "No data available for the given parameters" 



In [5]:
airline = "Scoot"  
start_date = "2024-01-15"  
end_date = "2024-01-20"  

df = fetch_flights(airline, start_date, end_date)
print(df)

    flight_date flight_status  \
0    2024-01-19        landed   
1    2024-01-19        landed   
2    2024-01-19        landed   
3    2024-01-19     scheduled   
4    2024-01-19        landed   
..          ...           ...   
195  2024-01-20        landed   
196  2024-01-20        landed   
197  2024-01-20        landed   
198  2024-01-20        landed   
199  2024-01-20        landed   

                                             departure  \
0    {'airport': 'Singapore Changi', 'timezone': 'A...   
1    {'airport': 'Singapore Changi', 'timezone': 'A...   
2    {'airport': 'Singapore Changi', 'timezone': 'A...   
3    {'airport': 'Singapore Changi', 'timezone': 'A...   
4    {'airport': 'Singapore Changi', 'timezone': 'A...   
..                                                 ...   
195  {'airport': 'Seoul (Incheon)', 'timezone': 'As...   
196  {'airport': 'Ninoy Aquino International', 'tim...   
197  {'airport': 'Nanjing Lukou International Airpo...   
198  {'airport': 'Hangz

In [17]:
df['departure'].to_list()

#split column of lists into two new columns
split = pd.DataFrame(df['departure'].to_list(), columns = ['airport', 'timezone','iata','icao',
                                                          'terminal','gate','delay','scheduled',
                                                          'estimated','actual','estimated_runway','actual_runway'])

#'arrival', 'airline','flight'

Unnamed: 0,airport,timezone,iata,icao,terminal,gate,delay,scheduled,estimated,actual,estimated_runway,actual_runway
0,Singapore Changi,Asia/Singapore,SIN,WSSS,1,C26,39.0,2024-01-19T14:55:00+00:00,2024-01-19T14:55:00+00:00,2024-01-19T15:34:00+00:00,2024-01-19T15:34:00+00:00,2024-01-19T15:34:00+00:00
1,Singapore Changi,Asia/Singapore,SIN,WSSS,1,C18,54.0,2024-01-19T14:50:00+00:00,2024-01-19T14:50:00+00:00,2024-01-19T15:43:00+00:00,2024-01-19T15:43:00+00:00,2024-01-19T15:43:00+00:00
2,Singapore Changi,Asia/Singapore,SIN,WSSS,1,D40,24.0,2024-01-19T14:30:00+00:00,2024-01-19T14:30:00+00:00,2024-01-19T14:53:00+00:00,2024-01-19T14:53:00+00:00,2024-01-19T14:53:00+00:00
3,Singapore Changi,Asia/Singapore,SIN,WSSS,1,B10,78.0,2024-01-19T14:15:00+00:00,2024-01-19T14:15:00+00:00,2024-01-19T15:32:00+00:00,2024-01-19T15:32:00+00:00,2024-01-19T15:32:00+00:00
4,Singapore Changi,Asia/Singapore,SIN,WSSS,1,C13,22.0,2024-01-19T14:25:00+00:00,2024-01-19T14:25:00+00:00,2024-01-19T14:46:00+00:00,2024-01-19T14:46:00+00:00,2024-01-19T14:46:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...
195,Seoul (Incheon),Asia/Seoul,ICN,RKSI,1,110,59.0,2024-01-20T22:35:00+00:00,2024-01-20T22:35:00+00:00,2024-01-20T23:34:00+00:00,2024-01-20T23:34:00+00:00,2024-01-20T23:34:00+00:00
196,Ninoy Aquino International,Asia/Manila,MNL,RPLL,3,,70.0,2024-01-20T19:30:00+00:00,2024-01-20T19:30:00+00:00,2024-01-20T20:39:00+00:00,2024-01-20T20:39:00+00:00,2024-01-20T20:39:00+00:00
197,Nanjing Lukou International Airport,Asia/Shanghai,NKG,ZSNJ,2,9,21.0,2024-01-20T17:00:00+00:00,2024-01-20T17:00:00+00:00,2024-01-20T17:21:00+00:00,2024-01-20T17:21:00+00:00,2024-01-20T17:21:00+00:00
198,Hangzhou,Asia/Shanghai,HGH,ZSHC,4,,12.0,2024-01-20T23:15:00+00:00,2024-01-20T23:15:00+00:00,2024-01-20T23:26:00+00:00,2024-01-20T23:26:00+00:00,2024-01-20T23:26:00+00:00


In [16]:
df

Unnamed: 0,flight_date,flight_status,departure,arrival,airline,flight,aircraft,live,airport,timezone,iata,icao,terminal,gate,delay,scheduled,estimated,actual,estimated_runway,actual_runway
0,2024-01-19,landed,"{'airport': 'Singapore Changi', 'timezone': 'A...","{'airport': 'Penang International', 'timezone'...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '424', 'iata': 'TR424', 'icao': 'TG...",,,Singapore Changi,Asia/Singapore,SIN,WSSS,1,C26,39.0,2024-01-19T14:55:00+00:00,2024-01-19T14:55:00+00:00,2024-01-19T15:34:00+00:00,2024-01-19T15:34:00+00:00,2024-01-19T15:34:00+00:00
1,2024-01-19,landed,"{'airport': 'Singapore Changi', 'timezone': 'A...","{'airport': 'Ninoy Aquino International', 'tim...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '390', 'iata': 'TR390', 'icao': 'TG...","{'registration': '9V-NCF', 'iata': 'A21N', 'ic...",,Singapore Changi,Asia/Singapore,SIN,WSSS,1,C18,54.0,2024-01-19T14:50:00+00:00,2024-01-19T14:50:00+00:00,2024-01-19T15:43:00+00:00,2024-01-19T15:43:00+00:00,2024-01-19T15:43:00+00:00
2,2024-01-19,landed,"{'airport': 'Singapore Changi', 'timezone': 'A...","{'airport': 'Nanchang', 'timezone': 'Asia/Shan...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '112', 'iata': 'TR112', 'icao': 'TG...",,,Singapore Changi,Asia/Singapore,SIN,WSSS,1,D40,24.0,2024-01-19T14:30:00+00:00,2024-01-19T14:30:00+00:00,2024-01-19T14:53:00+00:00,2024-01-19T14:53:00+00:00,2024-01-19T14:53:00+00:00
3,2024-01-19,scheduled,"{'airport': 'Singapore Changi', 'timezone': 'A...",{'airport': 'Sultan Aji Muhamad Sulaiman Airpo...,"{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '222', 'iata': 'TR222', 'icao': 'TG...",,,Singapore Changi,Asia/Singapore,SIN,WSSS,1,B10,78.0,2024-01-19T14:15:00+00:00,2024-01-19T14:15:00+00:00,2024-01-19T15:32:00+00:00,2024-01-19T15:32:00+00:00,2024-01-19T15:32:00+00:00
4,2024-01-19,landed,"{'airport': 'Singapore Changi', 'timezone': 'A...","{'airport': 'Krabi', 'timezone': 'Asia/Bangkok...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '686', 'iata': 'TR686', 'icao': 'TG...",,,Singapore Changi,Asia/Singapore,SIN,WSSS,1,C13,22.0,2024-01-19T14:25:00+00:00,2024-01-19T14:25:00+00:00,2024-01-19T14:46:00+00:00,2024-01-19T14:46:00+00:00,2024-01-19T14:46:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,2024-01-20,landed,"{'airport': 'Seoul (Incheon)', 'timezone': 'As...","{'airport': 'Singapore Changi', 'timezone': 'A...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '841', 'iata': 'TR841', 'icao': 'TG...","{'registration': '9V-OFC', 'iata': 'B788', 'ic...",,Seoul (Incheon),Asia/Seoul,ICN,RKSI,1,110,59.0,2024-01-20T22:35:00+00:00,2024-01-20T22:35:00+00:00,2024-01-20T23:34:00+00:00,2024-01-20T23:34:00+00:00,2024-01-20T23:34:00+00:00
196,2024-01-20,landed,"{'airport': 'Ninoy Aquino International', 'tim...","{'airport': 'Singapore Changi', 'timezone': 'A...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '391', 'iata': 'TR391', 'icao': 'TG...","{'registration': '9V-NCF', 'iata': 'A21N', 'ic...",,Ninoy Aquino International,Asia/Manila,MNL,RPLL,3,,70.0,2024-01-20T19:30:00+00:00,2024-01-20T19:30:00+00:00,2024-01-20T20:39:00+00:00,2024-01-20T20:39:00+00:00,2024-01-20T20:39:00+00:00
197,2024-01-20,landed,{'airport': 'Nanjing Lukou International Airpo...,"{'airport': 'Singapore Changi', 'timezone': 'A...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '181', 'iata': 'TR181', 'icao': 'TG...","{'registration': '9V-OFE', 'iata': 'B788', 'ic...",,Nanjing Lukou International Airport,Asia/Shanghai,NKG,ZSNJ,2,9,21.0,2024-01-20T17:00:00+00:00,2024-01-20T17:00:00+00:00,2024-01-20T17:21:00+00:00,2024-01-20T17:21:00+00:00,2024-01-20T17:21:00+00:00
198,2024-01-20,landed,"{'airport': 'Hangzhou', 'timezone': 'Asia/Shan...","{'airport': 'Singapore Changi', 'timezone': 'A...","{'name': 'Scoot', 'iata': 'TR', 'icao': 'TGW'}","{'number': '189', 'iata': 'TR189', 'icao': 'TG...","{'registration': '9V-OFI', 'iata': 'B788', 'ic...",,Hangzhou,Asia/Shanghai,HGH,ZSHC,4,,12.0,2024-01-20T23:15:00+00:00,2024-01-20T23:15:00+00:00,2024-01-20T23:26:00+00:00,2024-01-20T23:26:00+00:00,2024-01-20T23:26:00+00:00


# Step 2 - Data storage


In [9]:
def store_data(df, filename, database_url):
    # Store in CSV
    df.to_csv(filename, index=False)

    # Store in SQL Database
    engine = create_engine(database_url)
    df.to_sql('flights', con=engine, if_exists='replace', index=False)


In [None]:


# Replace 'username' and 'password' with your PostgreSQL credentials
# Replace 'localhost' with your database server address if it's not local
user = 'postgres'
password = 'temppass'
host = 'localhost'
port = '5432'  # default PostgreSQL port
db_name = 'flights'

# Connection string for the PostgreSQL server, without the specific database name
server_url = f'postgresql://{user}:{password}@{host}:{port}/'

# Engine for the server connection
server_engine = create_engine(server_url)

# Check if the database exists
if database_exists(server_engine.url.set(database=db_name)):
    # Drop the database
    drop_database(server_engine.url.set(database=db_name))
    print(f"Database '{db_name}' has been dropped.")

# Create the database
create_database(server_engine.url.set(database=db_name))
print(f"Database '{db_name}' has been created.")

# Dispose the engine
server_engine.dispose()


# Step 3 - Data Querying and filtering

In [7]:
def query_data(engine, airline=None, origin=None, destination=None):
    query = "SELECT * FROM flights WHERE 1=1"
    if airline:
        query += f" AND airline = '{airline}'"
    if origin:
        query += f" AND origin = '{origin}'"
    if destination:
        query += f" AND destination = '{destination}'"
    
    return pd.read_sql(query, con=engine)


# Step 4 - Analyzing the Data

In [8]:
def analyze_data(df):
    total_flights = len(df)
    average_duration = df['duration'].mean()
    flights_per_airline = df['airline'].value_counts()
    return total_flights, average_duration, flights_per_airline


# Step 5 - Visualizations

In [9]:
def plot_data(df):
    # Using Matplotlib for a simple histogram
    plt.figure(figsize=(10, 5))
    plt.hist(df['duration'], bins=20, color='blue')
    plt.title('Flight Duration Distribution')
    plt.xlabel('Duration (Minutes)')
    plt.ylabel('Number of Flights')
    plt.show()

    # Using Plotly for interactive charts
    fig = px.bar(df, x='airline', y='duration', title='Average Flight Duration by Airline')
    fig.show()


In [10]:
# DEFINE THE DATABASE CREDENTIALS
user = 'root'
password = 'password'
host = '127.0.0.1'
port = 3306
database = 'DE_challenge'
 
# PYTHON FUNCTION TO CONNECT TO THE MYSQL DATABASE AND
# RETURN THE SQLACHEMY ENGINE OBJECT
def get_connection():
    return create_engine(
        url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(
            user, password, host, port, database
        )
    )

Database 'flights' has been dropped.
Database 'flights' has been created.


In [11]:
engine.url.username

'postgres'