# OCEANIC AIRSPACE PROJECT

In [49]:
pip install geopandas

Note: you may need to restart the kernel to use updated packages.


In [50]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [51]:
pip install shapely 

Note: you may need to restart the kernel to use updated packages.


In [3]:
#Import Libraries
import pandas as pd
import geopandas as ps
import geopy as gy
import shapely as sy
import dask.dataframe as dd

In [4]:
#Setup the path for the file -- might have to change this if yours is named differently
bucket = '690folder/raw data' #Bucket name
data_key = '2021-12-24.csv' #Path to the CSV file 
data_location = 's3://{}/{}'.format(bucket, data_key)

In [6]:
#Import all of the raw data 
rawData_df = dd.read_csv(data_location, assume_missing=True)
print(f'Total record count : ',len(rawData_df.index))

Total record count :  3303988


In [7]:
#New dataframe with selected attributes from the raw data
airspaceData_df =rawData_df[["FRN73TMRPDateTimeOfMessageRec","FRN131HRPWCFloatingPointLat","FRN131HRPWCFloatingPointLong","FRN140GHGeometricHeight",
                 "FRN170TITargetId","RESHSelectedHeading","FRN80TATargetAddress",
                 "FRN161TNTrackNumber"]]

#Rename columns to make it easier to read
airspaceData_df = airspaceData_df.rename(columns={'FRN73TMRPDateTimeOfMessageRec': 'DateTime', 
                                                  'FRN131HRPWCFloatingPointLat': "Latitude", 
                                                  'FRN131HRPWCFloatingPointLong': "Longitude", 
                                                  'FRN140GHGeometricHeight': "Height", 
                                                  'FRN170TITargetId': "TargetID", 
                                                  'RESHSelectedHeading': "SelectedHeading", 
                                                  'FRN80TATargetAddress': "TargetAddress",
                                                  'FRN161TNTrackNumber': "TrackNumber"})

#Preview to ensure everything is looking as expected 
airspaceData_df.head()

Unnamed: 0,DateTime,Latitude,Longitude,Height,TargetID,SelectedHeading,TargetAddress,TrackNumber
0,2021-12-24T23:59:59.992Z,21.325208,-158.058116,2200.0,UAL253,78.75,AA92F9,1277.0
1,2021-12-24T20:54:11.141Z,21.321762,-157.908142,75.0,N357MH,,A3FFE9,640.0
2,2021-12-24T12:36:02.773Z,37.307144,-139.57417,35675.0,ASA899,,A7B779,1024.0
3,2021-12-24T04:13:17.836Z,21.49666,-146.918742,37300.0,HAL50,,A48E9E,1024.0
4,2021-12-24T23:59:59.984Z,25.936523,-149.628348,38775.0,SWA2385,45.703125,ABF949,1024.0


In [8]:
# Remove unnescessary characters 
char = ['T','Z']
for x in char:
    airspaceData_df["DateTime"] = airspaceData_df["DateTime"].str.replace( x ," ")

# Formatted Datetime
airspaceData_df["DateTime"] = dd.to_datetime(airspaceData_df["DateTime"], format="%Y-%m-%d %H:%M:%S")

In [9]:
# Create 4 new columns for Hour, Minute, Second and Microsecond
airspaceData_df["Hour"] = airspaceData_df["DateTime"].dt.hour
airspaceData_df["Minute"] = airspaceData_df["DateTime"].dt.minute
airspaceData_df["Second"] = airspaceData_df["DateTime"].dt.second
airspaceData_df["microSecond"] = airspaceData_df["DateTime"].dt.microsecond

In [10]:
# Reorder columns
airspaceData_df = airspaceData_df[["DateTime","Hour","Minute","Second","microSecond","Latitude","Longitude","Height",
                                   "TargetID","SelectedHeading","TargetAddress",
                                   "TrackNumber"]]

# Preview Dataframe
airspaceData_df.head()

Unnamed: 0,DateTime,Hour,Minute,Second,microSecond,Latitude,Longitude,Height,TargetID,SelectedHeading,TargetAddress,TrackNumber
0,2021-12-24 23:59:59.992,23,59,59,992000,21.325208,-158.058116,2200.0,UAL253,78.75,AA92F9,1277.0
1,2021-12-24 20:54:11.141,20,54,11,141000,21.321762,-157.908142,75.0,N357MH,,A3FFE9,640.0
2,2021-12-24 12:36:02.773,12,36,2,773000,37.307144,-139.57417,35675.0,ASA899,,A7B779,1024.0
3,2021-12-24 04:13:17.836,4,13,17,836000,21.49666,-146.918742,37300.0,HAL50,,A48E9E,1024.0
4,2021-12-24 23:59:59.984,23,59,59,984000,25.936523,-149.628348,38775.0,SWA2385,45.703125,ABF949,1024.0


In [11]:
# Show data types
airspaceData_df.dtypes

DateTime           datetime64[ns]
Hour                        int64
Minute                      int64
Second                      int64
microSecond                 int64
Latitude                  float64
Longitude                 float64
Height                    float64
TargetID                   object
SelectedHeading           float64
TargetAddress              object
TrackNumber               float64
dtype: object