# Aviation Database

Cleans Aviation Dataset According to the Needs of Future Implimentation. Current version outputs dataframe as a csv to "../output/CleanAviationData.csv"

Provided by Kheirallah Samaha
<br>
https://www.kaggle.com/datasets/khsamaha/aviation-accident-database-synopses

In [82]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd
import geopandas as gpd
from datetime import datetime

In [83]:
mongo = MongoClient(port=27017)

In [84]:
!mongoimport --type csv -d aviation -c accidents --headerline --drop "..\\data\\AviationData.csv"

!mongoimport --type csv -d aviation -c state_codes --headerline --drop "..\\data\\USState_Codes.csv"

2023-04-19T21:37:30.800-0500	connected to: mongodb://localhost/
2023-04-19T21:37:30.801-0500	dropping: aviation.accidents
2023-04-19T21:37:33.643-0500	88889 document(s) imported successfully. 0 document(s) failed to import.
2023-04-19T21:37:34.296-0500	connected to: mongodb://localhost/
2023-04-19T21:37:34.297-0500	dropping: aviation.state_codes
2023-04-19T21:37:34.322-0500	62 document(s) imported successfully. 0 document(s) failed to import.


In [85]:
db = mongo.aviation
accident_c = db.accidents
state_c = db.state_codes

In [86]:
# change Event.Date to Date codes
accident_c.aggregate([{
    "$project": {
        "Event.Date": {
            "$toDate": "$Event.Date"
            }
        }
    }
])


<pymongo.command_cursor.CommandCursor at 0x293e0db3dc8>

In [87]:
# exclude flight records without coordinate data

query = {
    "Latitude": {"$ne":""},
    "Longitude": {"$ne":""}
}

limit = 3

fields = {
    "Event.Date": 1,
    "Latitude": 1,
    "Longitude": 1,
    "_id": 0
}

pprint(list(accident_c.find(query, fields).limit(limit)))



[{'Event': {'Date': '1974-08-30'},
  'Latitude': 36.922223,
  'Longitude': -81.878056},
 {'Event': {'Date': '1979-09-17'},
  'Latitude': 42.445277,
  'Longitude': -70.758333},
 {'Event': {'Date': '1982-03-16'},
  'Latitude': 30.757778,
  'Longitude': -88.355555}]


In [88]:
# aggregate all flight data with coordinate data

event_coord = accident_c.find(query, fields)

df = pd.DataFrame(event_coord)

df.head()

Unnamed: 0,Event,Latitude,Longitude
0,{'Date': '1974-08-30'},36.9222,-81.8781
1,{'Date': '1979-09-17'},42.4453,-70.7583
2,{'Date': '1982-03-16'},30.7578,-88.3556
3,{'Date': '1983-01-08'},46.0411,-120.85
4,{'Date': '1983-09-09'},48.12,-113.888


In [89]:
# add "Date" column with DateTime Objects

df["Date"]=""

for record in df.iterrows():
    date = record[1]["Event"]["Date"]
    record[1]["Date"] = datetime.strptime(date, "%Y-%m-%d").date()
df


Unnamed: 0,Event,Latitude,Longitude,Date
0,{'Date': '1974-08-30'},36.9222,-81.8781,1974-08-30
1,{'Date': '1979-09-17'},42.4453,-70.7583,1979-09-17
2,{'Date': '1982-03-16'},30.7578,-88.3556,1982-03-16
3,{'Date': '1983-01-08'},46.0411,-120.85,1983-01-08
4,{'Date': '1983-09-09'},48.12,-113.888,1983-09-09
...,...,...,...,...
34362,{'Date': '2022-12-13'},047257N,0109280W,2022-12-13
34363,{'Date': '2022-12-14'},182724N,0066554W,2022-12-14
34364,{'Date': '2022-12-15'},373829N,0972635W,2022-12-15
34365,{'Date': '2022-12-16'},282825N,0822719W,2022-12-16


In [90]:
# sort by most recent
df = df.sort_values(by="Date", ascending=False)
df

Unnamed: 0,Event,Latitude,Longitude,Date
34366,{'Date': '2022-12-26'},341525N,1112021W,2022-12-26
34365,{'Date': '2022-12-16'},282825N,0822719W,2022-12-16
34364,{'Date': '2022-12-15'},373829N,0972635W,2022-12-15
34363,{'Date': '2022-12-14'},182724N,0066554W,2022-12-14
34362,{'Date': '2022-12-13'},047257N,0109280W,2022-12-13
...,...,...,...,...
4,{'Date': '1983-09-09'},48.12,-113.888,1983-09-09
3,{'Date': '1983-01-08'},46.0411,-120.85,1983-01-08
2,{'Date': '1982-03-16'},30.7578,-88.3556,1982-03-16
1,{'Date': '1979-09-17'},42.4453,-70.7583,1979-09-17


In [91]:


df = df[pd.to_numeric(df["Latitude"], errors='coerce').notnull()]

df = df.loc[df["Latitude"] <= 180]
df = df.loc[df["Latitude"] >= -180]

df= df.loc[df["Longitude"] <= 180]
df= df.loc[df["Longitude"] >= -180]

df

Unnamed: 0,Event,Latitude,Longitude,Date
11958,{'Date': '2007-12-31'},49.435,-2.60028,2007-12-31
11957,{'Date': '2007-12-31'},33.6756,-117.868,2007-12-31
11956,{'Date': '2007-12-30'},45.8661,-95.3944,2007-12-30
11955,{'Date': '2007-12-30'},35.5422,-120.523,2007-12-30
11954,{'Date': '2007-12-30'},34.6886,-87.92,2007-12-30
...,...,...,...,...
4,{'Date': '1983-09-09'},48.12,-113.888,1983-09-09
3,{'Date': '1983-01-08'},46.0411,-120.85,1983-01-08
2,{'Date': '1982-03-16'},30.7578,-88.3556,1982-03-16
1,{'Date': '1979-09-17'},42.4453,-70.7583,1979-09-17


In [92]:
# export CleanAviationData.csv and CleanAviationData.json

df.to_csv("../output/CleanAviationData.csv")
df.to_json("static/data/CleanAviationData.json", orient="records")
