# Advanced Programming Summative Assignment

Please see the README file (README.md) for a full overview of the project, installation instructions and running instructions. 

## Contents

#### 1.0 [Package installations and import statements](#1.0-Package-installations-and-import-statements)

#### 2.0 [Data extraction and cleaning](#2.0-Data-extraction-and-cleaning)

2.1 [Some other test](#2.1-Some-other-test)

### 1.0 Package installations and import statements

In [1]:
# Installing missing packages
# import sys
# !conda install --yes --prefix {sys.prefix} mysql-connector-python 
# !conda install --yes --prefix {sys.prefix} numpy
# !conda install --yes --prefix {sys.prefix} pandas


In [1]:
# Import packages
import os
import pandas as pd
import numpy as np
import json
# import mysql.connector
import matplotlib.pyplot as plt
import seaborn as sns
import tkinter as tk

### 2.0 Data extraction and cleaning

A lot of the data processing is generic and not specific to the original data. This allows the application to handle other datasets from the same source. 

In [2]:
# Extract data from CSV files
df_airports = pd.read_csv ('data/airports.csv')
df_runways = pd.read_csv ('data/runways.csv', index_col=['id'])
df_frequencies = pd.read_csv ('data/airport-frequencies.csv', index_col=['id'])

# Remove any rows with all data missing
df_airports.dropna(how='all')
df_runways.dropna(how='all')
df_frequencies.dropna(how='all')

# Remove any duplicated rows
df_airports.drop_duplicates()
df_runways.drop_duplicates()
df_frequencies.drop_duplicates()

# Remove unneeded columns
df_airports.drop(['keywords','home_link','local_code'], axis='columns', inplace=True)
df_runways.drop(['airport_ident'], axis='columns', inplace=True)
df_frequencies.drop(['airport_ident','description','type'], axis='columns', inplace=True)

# change the airport column id name to airport_ref to align with other data
df_airports.rename(columns={"id": "airport_ref"}, inplace=True)

# Remove rows that do not have a valid airport_ref
df_airports = df_airports[df_airports['airport_ref'].apply(lambda x: str(x).isdigit())]
df_runways = df_runways[df_runways['airport_ref'].apply(lambda x: str(x).isdigit())]
df_frequencies = df_frequencies[df_frequencies['airport_ref',].apply(lambda x: str(x).isdigit())]

# Add columns to the Airports df for small, medium and large airports with binary values
df_airports['small_airport'] = df_airports.type == 'small_airport'
df_airports['medium_airport'] = df_airports.type == 'medium_airport'
df_airports['large_airport'] = df_airports.type == 'large_airport'
df_airports['small_airport'] = df_airports['small_airport'].astype(int) 
df_airports['medium_airport'] = df_airports['medium_airport'].astype(int) 
df_airports['large_airport'] = df_airports['large_airport'].astype(int) 

# Pandas replaces the 'continent' value 'NA' (for North America) with NaN. Change back.
# df_airports=df_airports.continent.replace("NaN","NA")


# Change any important data types that are not in their correct format
# df_airports








# filter out closed airports - may need to force a copy, not sure yet - may need to do this after frequencies are added
# df_airports = df_airports[(df_airports.type != 'closed')]

# merge two dataframes
# df_airports_frequencies = pd.merge(df_airports, df_frequencies, on="airport_ref", how = 'outer')

# df_airports_frequencies.shape

print(df_airports)
# print('')
# print(df_frequencies)
# print('')
# print(df_airports_frequencies)

       airport_ref    ident           type  \
0             6523      00A       heliport   
1           323361     00AA  small_airport   
2             6524     00AK  small_airport   
3             6525     00AL  small_airport   
4             6526     00AR         closed   
...            ...      ...            ...   
68942        46378  ZZ-0001       heliport   
68943       307326  ZZ-0002  small_airport   
68944       346788  ZZ-0003  small_airport   
68945       342102     ZZZW         closed   
68946       313629     ZZZZ  small_airport   

                                     name  latitude_deg  longitude_deg  \
0                       Total Rf Heliport     40.070801     -74.933601   
1                    Aero B Ranch Airport     38.704022    -101.473911   
2                            Lowell Field     59.947733    -151.692524   
3                            Epps Airpark     34.864799     -86.770302   
4      Newport Hospital & Clinic Heliport     35.608700     -91.254898   
...

In [10]:
# write to JSON
airports_json = df_airports.to_json(orient = 'records')
airports_json_list = json.loads(airports_json)

airport_frequencies_json = df_frequencies.to_json(orient = 'records')
frequencies_json_list = json.loads(airport_frequencies_json)

for frequency in frequencies_json_list:
    for airport in airports_json_list:
        if frequency['airport_ref'] == airport['airport_ref']:
            airport[frequency['type']] = frequency['frequency_mhz']
            

            

print(airports_json_list[1])

# json.dumps(parsed, indent=4) print
# print(type(json_dict))
# print(type(airports_json))
# print(airports_json[0:380])

# print(json_list[1])
# for airport in json_list:
#     print(i)
# print(frequencies_json_list)

<class 'dict'>
{'airport_ref': 323361, 'ident': '00AA', 'type': 'small_airport', 'name': 'Aero B Ranch Airport', 'latitude_deg': 38.704022, 'longitude_deg': -101.473911, 'elevation_ft': 3435.0, 'continent': None, 'iso_country': 'US', 'iso_region': 'US-KS', 'municipality': 'Leoti', 'scheduled_service': 'no', 'gps_code': '00AA', 'iata_code': None, 'wikipedia_link': None, 'small_airport': 1, 'medium_airport': 0, 'large_airport': 0}


In [None]:
print('airports')
df_airports.shape

In [138]:
print('frequencies')
df_frequencies.shape

frequencies


(28962, 5)

In [190]:
# df_airports_frequencies = pd.merge(df_frequencies, df_airports,  on="airport_ref", how = 'outer')
# # # df_airports_frequencies.dropna(how='all')
# # # df_airports_frequencies = df_airports_frequencies.dropna(subset=['airport_ref'])
# df_airports_frequencies.shape
# # print(df_airports_frequencies)
# # df_airports_frequencies.to_csv('file_name.csv')

(87317, 20)

[{"airport_ref":6528,"type_x":"CTAF","frequency_mhz":122.9,"ident":"00CA","type_y":"small_airport","name":"Goldstone (GTS) Airport","latitude_deg":35.35474,"longitude_deg":-116.885329,"elevation_ft":3038.0,"continent":null,"iso_country":"US","iso_region":"US-CA","municipality":"Barstow","scheduled_service":"no","gps_code":"00CA","iata_code":null,"wikipedia_link":null,"small_airport":1,"medium_airport":0,"large_airport":0},{"airport_ref":6589,"type_x":"ARCAL","frequency_mhz":122.9,"ident":"01FL","type_y":"small_airport","name":"Cedar Knoll Flying Ranch Airport","latitude_deg":28.7819004059,"longitude_deg":-81.1592025757,"elevation_ft":19.0,"continent":null,"iso_country":"US","iso_region":"US-FL","municipality":"Geneva","scheduled_service":"no","gps_code":"01FL","iata_code":null,"wikipedia_link":null,"small_airport":1,"medium_airport":0,"large_airport":0},{"airport_ref":6589,"type_x":"CTAF","frequency_mhz":122.8,"ident":"01FL","type_y":"small_airport","name":"Cedar Knoll Flying Ranch Air

#### 2.1 Some other test