In [2]:
import os
import pandas as pd
from urllib.request import urlretrieve
from zipfile import ZipFile
import math

class DataDownloader:
    def __init__(self, data_url, file_name):
        self.data_url = data_url
        self.file_name = file_name
        self.downloads_dir = os.path.join(os.getcwd(), 'downloads')
        self.zip_dir = os.path.join(self.downloads_dir, 'zip_files')

        # Check if directories exist, create if not
        for directory in [self.downloads_dir, self.zip_dir]:
            if not os.path.exists(directory):
                os.makedirs(directory)

        # Check if file already exists
        if not os.path.isfile(os.path.join(self.downloads_dir, self.file_name)):
            self.download_data()

        # Unzip the downloaded file
        self.unzip_data()

        # Read datasets into corresponding pandas dataframes
        self.airlines_df = pd.read_csv(os.path.join(self.zip_dir, 'airlines.csv')).drop(columns=["index"], axis=1)
        self.airplanes_df = pd.read_csv(os.path.join(self.zip_dir, 'airplanes.csv')).drop(columns=["index"], axis=1)
        self.airports_df = pd.read_csv(os.path.join(self.zip_dir, 'airports.csv')).drop(
            columns=["index", "Type", "Source"], axis=1)
        self.routes_df = pd.read_csv(os.path.join(self.zip_dir, 'routes.csv')).drop(columns=["index"], axis=1)

    def download_data(self):
        file_path = os.path.join(self.downloads_dir, self.file_name)
        urlretrieve(self.data_url, file_path)
        print(f"Downloaded {self.file_name} to {self.downloads_dir}")

    def unzip_data(self):
        zip_file_path = os.path.join(self.downloads_dir, self.file_name)
        with ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(self.zip_dir)
        print(f"Unzipped {self.file_name} to {self.zip_dir}")


# Example usage
data_url = "https://gitlab.com/adpro1/adpro2024/-/raw/main/Files/flight_data.zip?inline=false"
file_name = "data.zip"
data_downloader = DataDownloader(data_url, file_name)

# Manual Unit test with correct distances out of this source ("https://www.luftlinie.org/9.652170181274414,%206.462259769439697/5-1-%E0%B8%AB%E0%B8%A1%E0%B8%B9%E0%B9%88-5-%E0%B8%95%E0%B8%B3%E0%B8%9A%E0%B8%A5%E0%B8%81%E0%B8%81%E0%B8%9B%E0%B8%A5%E0%B8%B2%E0%B8%8B%E0%B8%B4%E0%B8%A7-%E0%B8%AD%E0%B8%B3%E0%B9%80%E0%B8%A0%E0%B8%AD%E0%B8%A0%E0%B8%B9%E0%B8%9E%E0%B8%B2%E0%B8%99-%%E0%B8%88%E0%B8%B1%E0%B8%87%E0%B8%AB%E0%B8%A7%E0%B8%B1%E0%B8%94%E0%B8%AA%E0%B8%81%E0%B8%A5%E0%B8%99%E0%B8%84%E0%B8%A3-47180"):
"""
Unit test: comparing 3 long distances out of the model with the real distance taken out of the given source above:
first call function airport_distance
second put in two airport names 
third compare results with real distance calculated with source above
forth repeat three more times
finally evaluate how close the model's predictions are to the real distances
"""
print("Unit test: comparing model performance with real-world distances \n")
print("The model predicts:")
data_downloader.airport_distance("Kugaaruk Airport", "Húsavík Airport")
print("the real distance is 2,945.76 km")

print("\n The model predicts:")
data_downloader.airport_distance("Húsavík Airport", "Minna Airport")
print("the real distance is 6,519.29 km")

print("\n The model predicts:")
data_downloader.airport_distance("Minna Airport", "Kleine Brogel Air Base")
print("the real distance is 4,617.28 km")

print("\n Even over long distances, the model gives us accurate distances")


Unzipped data.zip to /home/tom/documents/group_19/downloads/zip_files
Unit test: comparing model performance with real-world distances 

The model predicts:
2945.754511637728 km
the real distance is 2,945.76 km

 The model predicts:
6520.3538796109415 km
the real distance is 6,519.29 km

 The model predicts:
4617.274033404375 km
the real distance is 4,617.28 km

 Even over long distances, the model gives us accurate distances


In [33]:
data_downloader.airlines_df

Unnamed: 0,Airline ID,Name,Alias,IATA,ICAO,Callsign,Country,Active
0,-1,Unknown,\N,-,,\N,\N,Y
1,1,Private flight,\N,-,,,,Y
2,2,135 Airways,\N,,GNL,GENERAL,United States,N
3,3,1Time Airline,\N,1T,RNX,NEXTIME,South Africa,Y
4,4,2 Sqn No 1 Elementary Flying Training School,\N,,WYT,,United Kingdom,N
...,...,...,...,...,...,...,...,...
6157,21248,GX Airlines,,,CBG,SPRAY,China,Y
6158,21251,Lynx Aviation (L3/SSX),,,SSX,Shasta,United States,N
6159,21268,Jetgo Australia,,JG,\N,,Australia,Y
6160,21270,Air Carnival,,2S,\N,,India,Y
