# Step 1 - automated data ingestion

In [5]:
import requests
from datetime import datetime, timedelta
from os import environ
import pandas as pd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import plotly.express as px


api_key = environ.get('aviation_key')
api_url = 'https://api.aviationstack.com/v1/flights'

def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days) + 1):
        yield start_date + timedelta(n)

def fetch_flights_for_date(flight_date, airline):
    params = {
        'access_key': api_key,
        'flight_date': flight_date.strftime('%Y-%m-%d'),
        'airline_name': airline
    }
    
    response = requests.get(api_url, params=params)
    if response.status_code == 200:
        data = response.json().get('data', [])
        return data
    else:
        print(f"Failed to fetch data for {flight_date}: {response.status_code}")
        return []

def fetch_flights(airline, start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    all_flights = []

    for single_date in daterange(start_date, end_date):
        flights_on_date = fetch_flights_for_date(single_date, airline)
        all_flights.extend(flights_on_date)

    if all_flights:
        return pd.DataFrame(all_flights)
    else:
        return "No data available for the given parameters" 



In [6]:
airline = "Scoot"  
start_date = "2024-01-15"  
end_date = "2024-01-20"  

df = fetch_flights(airline, start_date, end_date)
print(df)

    flight_date flight_status  \
0    2024-01-17        landed   
1    2024-01-17        landed   
2    2024-01-17     scheduled   
3    2024-01-17        landed   
4    2024-01-17        landed   
..          ...           ...   
395  2024-01-20        landed   
396  2024-01-20        landed   
397  2024-01-20        landed   
398  2024-01-20        landed   
399  2024-01-20        landed   

                                             departure  \
0    {'airport': 'Suvarnabhumi International', 'tim...   
1    {'airport': 'Penang International', 'timezone'...   
2    {'airport': 'Soekarno-Hatta International', 't...   
3    {'airport': 'Nanning', 'timezone': 'Asia/Shang...   
4    {'airport': 'Singapore Changi', 'timezone': 'A...   
..                                                 ...   
395  {'airport': 'Seoul (Incheon)', 'timezone': 'As...   
396  {'airport': 'Ninoy Aquino International', 'tim...   
397  {'airport': 'Nanjing Lukou International Airpo...   
398  {'airport': 'Hangz

# Step 2 - Data storage


In [9]:
def store_data(df, filename, database_url):
    # Store in CSV
    df.to_csv(filename, index=False)

    # Store in SQL Database
    engine = create_engine(database_url)
    df.to_sql('flights', con=engine, if_exists='replace', index=False)


# Step 3 - Data Querying and filtering

In [None]:
def query_data(engine, airline=None, origin=None, destination=None):
    query = "SELECT * FROM flights WHERE 1=1"
    if airline:
        query += f" AND airline = '{airline}'"
    if origin:
        query += f" AND origin = '{origin}'"
    if destination:
        query += f" AND destination = '{destination}'"
    
    return pd.read_sql(query, con=engine)


# Step 4 - Analyzing the Data

In [None]:
def analyze_data(df):
    total_flights = len(df)
    average_duration = df['duration'].mean()
    flights_per_airline = df['airline'].value_counts()
    return total_flights, average_duration, flights_per_airline


# Step 5 - Visualizations

In [None]:
def plot_data(df):
    # Using Matplotlib for a simple histogram
    plt.figure(figsize=(10, 5))
    plt.hist(df['duration'], bins=20, color='blue')
    plt.title('Flight Duration Distribution')
    plt.xlabel('Duration (Minutes)')
    plt.ylabel('Number of Flights')
    plt.show()

    # Using Plotly for interactive charts
    fig = px.bar(df, x='airline', y='duration', title='Average Flight Duration by Airline')
    fig.show()
