In [16]:

# Let's load data from two CSV files into two data frame, and then concatenate those two data frames together.
# Then, we will find the Haversine Distance between the general lat/long coordinates in the 'worldcities.csv'
# file and the lat/long coordinates in the 'airports.csv' file.  The Haversine formula formula is 
# very useful to calculate the great-circle distance between two points, that is, the shortest distance 
# over the earth’s surface – giving an ‘as-the-crow-flies’ distance between the points 
# (ignoring any hills they fly over, of course!).

import pandas as pd 
import numpy as np

#for_map = pd.read_csv('campaign_contributions_for_map.tsv', sep='\t')
df_airports = pd.read_csv('C:\\Users\\ryans\\OneDrive\\Desktop\\Briefcase\\PDFs\\1-ALL PYTHON & R CODE SAMPLES\\A - GITHUB\\Haversine Distance - Airport or Not\\airports.csv')
print(df_airports.head(3))

df_cities = pd.read_csv('C:\\Users\\ryans\\OneDrive\\Desktop\\Briefcase\\PDFs\\1-ALL PYTHON & R CODE SAMPLES\\A - GITHUB\\Haversine Distance - Airport or Not\\worldcities.csv')
print(df_cities.head(3))



     id ident           type               name  latitude_deg  longitude_deg  \
0  6523   00A       heliport  Total Rf Heliport     40.070801     -74.933601   
1  6524  00AK  small_airport       Lowell Field     59.949200    -151.695999   
2  6525  00AL  small_airport       Epps Airpark     34.864799     -86.770302   

   elevation_ft continent iso_country iso_region  municipality  \
0          11.0       NaN          US      US-PA      Bensalem   
1         450.0       NaN          US      US-AK  Anchor Point   
2         820.0       NaN          US      US-AL       Harvest   

  scheduled_service gps_code iata_code local_code home_link wikipedia_link  \
0                no      00A       NaN        00A       NaN            NaN   
1                no     00AK       NaN       00AK       NaN            NaN   
2                no     00AL       NaN       00AL       NaN            NaN   

  keywords  
0      NaN  
1      NaN  
2      NaN  
      city city_ascii      lat       lng    count

In [39]:

# join the two dataframes - must be the same length
df = pd.concat([df_cities, df_airports], axis=1)

# cast latitudes and longitudes to numeric
cols = ["lat", "lng", "latitude_deg", "longitude_deg"]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce', axis=1)

# create a mask where our conditions are met (difference between lat fuze and lat air < 5 and difference between long fuze and long air < 0.1)
mask = ((abs(df["lat"] - df["latitude_deg"]) < .5) & (abs(df["lng"] - df["longitude_deg"]) < .5))

# fill the type column
df.loc[mask, 'Type'] = "Airport"
df.shape


(43930, 30)

In [40]:

# let's sort by our newly created field, which identifies airport lat/lonn coordinates within .5 places of 
# a city's lat/long coordinates
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

df.sort_values(by=['Type'], inplace=True)
df.head()


Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id,id.1,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords,Type
1928,Lexington,Lexington,38.0423,-84.4587,United States,US,USA,Kentucky,,317110.0,1840015000.0,8398,1KY4,heliport,Central Baptist Hospital Heliport,38.018101,-84.511902,1076.0,,US,US-KY,Lexington,no,1KY4,,1KY4,,,,Airport
24087,Buena Vista,Buena Vista,37.7319,-79.3569,United States,US,USA,Virginia,,6367.0,1840004000.0,21362,KW90,small_airport,New London Airport,37.271801,-79.335899,849.0,,US,US-VA,Forest,no,KW90,,W90,,,,Airport
23752,North Caldwell,North Caldwell,40.8629,-74.2576,United States,US,USA,New Jersey,,6621.0,1840003000.0,21088,KSMQ,small_airport,Somerset Airport,40.625999,-74.670197,105.0,,US,US-NJ,Somerville,no,KSMQ,,SMQ,,http://en.wikipedia.org/wiki/Somerset_Airport_...,,Airport
23246,Orange City,Orange City,43.0023,-96.0566,United States,US,USA,Iowa,,7044.0,1840009000.0,20762,KORC,small_airport,Orange City Municipal Airport,42.990299,-96.062798,1414.0,,US,US-IA,Orange City,no,KORC,,ORC,,,,Airport
21654,Closter,Closter,40.9733,-73.9604,United States,US,USA,New Jersey,,8511.0,1840004000.0,19676,KDXR,small_airport,Danbury Municipal Airport,41.371498,-73.482201,458.0,,US,US-CT,Danbury,no,KDXR,,DXR,,,,Airport


In [41]:

# around 22 airports were identified, out of 43,930 total lat/long coordinate pairs.
df_final = df[df['Type'].str.contains('Airport', na = False)]
df_final.shape


(22, 30)

In [42]:

df_final


Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id,id.1,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords,Type
1928,Lexington,Lexington,38.0423,-84.4587,United States,US,USA,Kentucky,,317110.0,1840015000.0,8398,1KY4,heliport,Central Baptist Hospital Heliport,38.018101,-84.511902,1076.0,,US,US-KY,Lexington,no,1KY4,,1KY4,,,,Airport
24087,Buena Vista,Buena Vista,37.7319,-79.3569,United States,US,USA,Virginia,,6367.0,1840004000.0,21362,KW90,small_airport,New London Airport,37.271801,-79.335899,849.0,,US,US-VA,Forest,no,KW90,,W90,,,,Airport
23752,North Caldwell,North Caldwell,40.8629,-74.2576,United States,US,USA,New Jersey,,6621.0,1840003000.0,21088,KSMQ,small_airport,Somerset Airport,40.625999,-74.670197,105.0,,US,US-NJ,Somerville,no,KSMQ,,SMQ,,http://en.wikipedia.org/wiki/Somerset_Airport_...,,Airport
23246,Orange City,Orange City,43.0023,-96.0566,United States,US,USA,Iowa,,7044.0,1840009000.0,20762,KORC,small_airport,Orange City Municipal Airport,42.990299,-96.062798,1414.0,,US,US-IA,Orange City,no,KORC,,ORC,,,,Airport
21654,Closter,Closter,40.9733,-73.9604,United States,US,USA,New Jersey,,8511.0,1840004000.0,19676,KDXR,small_airport,Danbury Municipal Airport,41.371498,-73.482201,458.0,,US,US-CT,Danbury,no,KDXR,,DXR,,,,Airport
21135,Dumbarton,Dumbarton,37.613,-77.5065,United States,US,USA,Virginia,,9036.0,1840006000.0,19294,KAPH,small_airport,A P Hill Aaf (Fort A P Hill) Airport,38.068901,-77.318298,220.0,,US,US-VA,Fort A. P. Hill,no,KAPH,,APH,,http://en.wikipedia.org/wiki/A.P._Hill_Army_Ai...,,Airport
20643,Burlington,Burlington,41.7598,-72.9589,United States,US,USA,Connecticut,,9607.0,1840034000.0,18809,K4B8,small_airport,Robertson Field,41.690399,-72.864799,200.0,,US,US-CT,Plainville,no,K4B8,,4B8,,http://en.wikipedia.org/wiki/Robertson_Field,,Airport
18463,Lilburn,Lilburn,33.8887,-84.1379,United States,US,USA,Georgia,,12810.0,1840016000.0,17512,GA38,heliport,Walton County Hospital Heliport,33.783199,-83.717903,869.0,,US,US-GA,Monroe,no,GA38,,GA38,,,,Airport
15388,Lichtenfels,Lichtenfels,50.1333,11.0333,Germany,DE,DEU,Bavaria,minor,20133.0,1276573000.0,28596,EDQX,small_airport,Hetzleser Berg Airport,49.642223,11.162222,1765.0,EU,DE,DE-BY,Neunkirchen am Brand,no,EDQX,,,,,,Airport
15380,Greiz,Greiz,50.6547,12.1997,Germany,DE,DEU,Thuringia,minor,20103.0,1276236000.0,2263,EDQM,medium_airport,Hof-Plauen Airport,50.288612,11.856389,1959.0,EU,DE,DE-BY,Hof,yes,EDQM,HOQ,,,http://en.wikipedia.org/wiki/Hof-Plauen_Airport,,Airport


In [31]:

# data sources
# https://www.partow.net/miscellaneous/airportdatabase/#top
# https://simplemaps.com/data/world-cities
