In [21]:
import pandas as pd 
import geocoder
import requests
import sys
import pickle
import geopy.distance as dist
import numpy as np

In [22]:
data = pd.read_csv('Airplane_Crashes_and_Fatalities_Since_1908.csv')
data.drop(['Flight #','Registration','cn/In','Ground'], axis=1, inplace=True)

# Take only the last rows for testing
data = data.iloc[4000:]

In [23]:
def geocode(address):
    try :  
        
        # send the request
        url = 'https://maps.googleapis.com/maps/api/geocode/json?address={0},&key=AIzaSyC-964iuqZr0UHdYB3tGmMNhbyKVMXDneo'.format(address)
        r = requests.get(url)
        
        # get the address information in a json fomrat
        results = r.json()['results']
        
        # get the location informations
        location = results[0]['geometry']['location']
        
        
        return int(location['lat']), int(location['lng'])
    
    except :
        return 0,0

### Uncomment and Run if you want to recompute the geocoordinates of the crashes

In [24]:
# get lattitude longitude for each adress by applying the geocode function
data['LatLongCrash'] = data['Location'].apply(lambda address : geocode(address))

# drop the unlocalisable crashes
data.dropna(subset=['LatLongCrash'], inplace = True)

# Drop all crashes without Routes
data.dropna(subset=['Route'], how='any', inplace = True)


# Drop all crashes that could not be geolocated
data = data[data['LatLongCrash'] != (0,0)]
data = data[data['LatLongCrash'] != '(0, 0)']

# Reset index
data = data.reset_index()
data = data.iloc[:,1:]

# write the new dataFrane in a Csv File
data.to_csv('Crash_Df', sep = '#')

In [25]:
data

Unnamed: 0,Date,Time,Location,Operator,Route,Type,Aboard,Fatalities,Summary,LatLongCrash
0,03/18/1991,,"Brasillia, Brazil",Conesul Taxi Aéreo,Ubrraba - Brasillia,Gates Learjet 25,7.0,7.0,Crashed 8km short of the runway in darkness.,"(-15, -47)"
1,03/23/1991,11:37,"Navoi, Uzbekistan",Uzbek Civil Aviation Administration,Trashkent - Navoi,Antonov AN-24B,63.0,34.0,The aircraft overran the runway and crashed in...,"(40, 65)"
2,04/04/1991,13:40,"Lake Thutade, Canada",Central Mountain Air Services,Sturdee - Smithers,Douglas C-47B-15-DK,7.0,6.0,The plane crash landed on a frozen lake after ...,"(56, -126)"
3,04/04/1991,12:10,"Marion, Pennsylvania",Private,"Williamsport, PA - Philadelphia, PA",Piper Aerostar 601 / Bell 412SP,5.0,5.0,When the plane's nose gear indicator light did...,"(39, -77)"
4,04/05/1991,14:51,"Brunswick, Georgia",AtlantiSoutheast Airlines,Atlanta - Brunswick,Embraer 120RT- Brasilia,23.0,23.0,"Just after turning onto final approach, the ai...","(31, -81)"
5,04/19/1991,11:56,"Off Nuka Hiva, French Polynesia",Air Tahiti,Hiva Ou - Nuku Hiva,Dornier Do-228-212,22.0,11.0,Crashed into the sea after taking off from Hiv...,"(-8, -140)"
6,05/09/1991,,"Sulawesi, Indonesia",Merpati Nusantara Airlines,Ambon - Ternate - Manado,Fokker F-27 Friendship 600,13.0,13.0,Crashed into Mt. Klabat in heavy fog while des...,"(-1, 120)"
7,05/13/1991,13:19,"Grand Canyon, Airzona",Air Taxi - Air Grand Canyon Inc.,Local sightseeing,Cessna 207A,7.0,7.0,The airplane crashed into a wooded area about ...,"(36, -112)"
8,05/23/1991,13:06,"Leningrad, USSR",Aeroflot,Sukhumi - Leningrad,Tupolev TU-154B-1,181.0,13.0,Undershot the runway and broke in half after a...,"(59, 30)"
9,05/26/1991,23:17,"Near Ban Nong Rong, Thailand",Lauda Air,"Bangkok - Wien, Austria",Boeing B-767-3Z9ER,223.0,223.0,Twelve minutes after takeoff the crew received...,"(14, 100)"


## Computes coordinates of the route bounding the Crash

In [26]:
# get all routes from the dataFrame and returns the LatLong coordinates of the location between where the crash happened  
def get_routes(crash_locations, routes) :
    
    # to store the inbetween location names
    start_names = []
    end_names = []
    
    # to store the inbetween location coordinates
    start_coordinates = []
    end_coordinates = []
    
    # store the routes to show on the visualisation
    to_show = []
    
    for i in range(len(routes)) :
        found_crash = False
        try : 
            transits = routes[i].split('-')
            
            # If the route is not specified juste go to the next route
            if(len(transits) == 1) :
                start_names.append('Just one route')
                end_names.append('Just one route')
                start_coordinates.append('Just one route')
                end_coordinates.append('Just one route')
                to_show.append(False)
                print('JUST ONE TRANSIT !!!')
                continue
                
            for j in range(len(transits)-1) :
                # get a route from the route
                start = transits[j]
                end = transits[j+1]

                # get geolocation to compute distance
                start_geo = geocode(start)
                end_geo = geocode(end)
                crash_loc = crash_locations[i]

                # computes distances
                start_end_dist = dist.vincenty(start_geo,end_geo).km
                start_crash_dist = dist.vincenty(start_geo, crash_loc).km
                
          
                
                # if the crash distance from starting point in smaller than the overall distance that it means that is in this route
                if (start_crash_dist <= start_end_dist) :
                
                    found_crash = True
                    to_show.append(True)
                    # Store the values corresponding to the crash in arrays
                    print('found crash : {} , index : {} and Crash Location {}'.format(found_crash,i,crash_locations[i]))
                    start_names.append(start)
                    end_names.append(end)
                    start_coordinates.append(start_geo)
                    end_coordinates.append(end_geo)
                    break
                
                    
            # If didn't find crash location
            if found_crash == False :
                print('found crash : {} , index : {} and Crash Location {}'.format(found_crash,i,crash_locations[i]))
                start_names.append('couldn\'t find')
                end_names.append('couldn\'t find')
                start_coordinates.append('couldn\'t find')
                end_coordinates.append('couldn\'t find')
                to_show.append(False)
                
             
        except Exception as e :
            print('error at index : {} , error : {}'.format(i,e))
            error = float('nan')
            start_names.append(error)
            end_names.append(error)
            start_coordinates.append(error)
            end_coordinates.append(error)
            to_show.append(False)
            
    
    return start_names, end_names, start_coordinates, end_coordinates, to_show

### Uncomment and Run the above cell to recompute the route corresponding to the crash

In [27]:
# Get the locations routes of crash by name geo coordinates
start, end, start_geo, end_geo, to_show = get_routes(data['LatLongCrash'], data['Route'].values)


# put the values found in a DataFrame
data['Start City Crash'] = start
data['End City Crash'] = end
data['Start City Geo Crash'] = start_geo
data['End City Geo Crash'] = end_geo
data['To_show'] = to_show


# Drop the Crash routes that could not be found
data.reset_index()
data.dropna(axis=1, how='any', inplace=True)

found crash : True , index : 0 and Crash Location (-15, -47)
found crash : True , index : 1 and Crash Location (40, 65)
found crash : True , index : 2 and Crash Location (56, -126)
found crash : True , index : 3 and Crash Location (39, -77)
found crash : True , index : 4 and Crash Location (31, -81)
found crash : True , index : 5 and Crash Location (-8, -140)
found crash : False , index : 6 and Crash Location (-1, 120)
JUST ONE TRANSIT !!!
found crash : True , index : 8 and Crash Location (59, 30)
found crash : True , index : 9 and Crash Location (14, 100)
found crash : True , index : 10 and Crash Location (-8, 13)
found crash : True , index : 11 and Crash Location (10, -74)
found crash : True , index : 12 and Crash Location (59, 26)
found crash : True , index : 13 and Crash Location (13, 5)
found crash : True , index : 14 and Crash Location (33, -86)
found crash : True , index : 15 and Crash Location (21, 39)
found crash : False , index : 16 and Crash Location (-9, 160)
found crash : 

found crash : True , index : 144 and Crash Location (43, 87)
found crash : True , index : 145 and Crash Location (30, 57)
found crash : True , index : 146 and Crash Location (41, 20)
found crash : True , index : 147 and Crash Location (14, -90)
found crash : True , index : 148 and Crash Location (47, -92)
found crash : True , index : 149 and Crash Location (69, -133)
found crash : False , index : 150 and Crash Location (19, 103)
found crash : True , index : 151 and Crash Location (13, 123)
found crash : True , index : 152 and Crash Location (33, -117)
found crash : True , index : 153 and Crash Location (40, 43)
found crash : True , index : 154 and Crash Location (-31, 159)
found crash : True , index : 155 and Crash Location (27, -80)
found crash : True , index : 156 and Crash Location (39, -82)
found crash : True , index : 157 and Crash Location (43, 43)
found crash : True , index : 158 and Crash Location (-9, -76)
found crash : True , index : 159 and Crash Location (52, -1)
found cras

found crash : True , index : 280 and Crash Location (-32, -65)
found crash : False , index : 281 and Crash Location (10, 7)
found crash : True , index : 282 and Crash Location (9, 80)
found crash : True , index : 283 and Crash Location (4, 9)
found crash : True , index : 284 and Crash Location (39, 45)
found crash : True , index : 285 and Crash Location (47, 139)
found crash : True , index : 286 and Crash Location (18, -72)
found crash : True , index : 287 and Crash Location (45, 10)
found crash : True , index : 288 and Crash Location (3, -76)
found crash : True , index : 289 and Crash Location (-4, 15)
found crash : False , index : 290 and Crash Location (-41, 173)
found crash : True , index : 291 and Crash Location (-25, -57)
found crash : True , index : 292 and Crash Location (34, -109)
found crash : True , index : 293 and Crash Location (19, -70)
found crash : True , index : 294 and Crash Location (31, -116)
found crash : True , index : 295 and Crash Location (18, -72)
found crash 

found crash : True , index : 417 and Crash Location (4, -74)
found crash : True , index : 418 and Crash Location (-2, -76)
found crash : False , index : 419 and Crash Location (16, -7)
found crash : True , index : 420 and Crash Location (19, 102)
found crash : True , index : 421 and Crash Location (35, -80)
found crash : True , index : 422 and Crash Location (49, 104)
found crash : True , index : 423 and Crash Location (45, -73)
JUST ONE TRANSIT !!!
found crash : True , index : 425 and Crash Location (25, 55)
found crash : True , index : 426 and Crash Location (-31, -64)
found crash : True , index : 427 and Crash Location (15, 38)
found crash : False , index : 428 and Crash Location (50, -1)
found crash : True , index : 429 and Crash Location (41, 2)
found crash : True , index : 430 and Crash Location (-3, -61)
found crash : True , index : 431 and Crash Location (9, 76)
found crash : True , index : 432 and Crash Location (44, 8)
found crash : True , index : 433 and Crash Location (47, 

found crash : False , index : 558 and Crash Location (30, 117)
found crash : True , index : 559 and Crash Location (17, -92)
found crash : True , index : 560 and Crash Location (4, -73)
found crash : False , index : 561 and Crash Location (35, -78)
found crash : True , index : 562 and Crash Location (25, 85)
found crash : True , index : 563 and Crash Location (46, -67)
JUST ONE TRANSIT !!!
found crash : True , index : 565 and Crash Location (48, 2)
found crash : True , index : 566 and Crash Location (-23, -46)
found crash : True , index : 567 and Crash Location (-41, -72)
found crash : True , index : 568 and Crash Location (40, -74)
found crash : True , index : 569 and Crash Location (-6, 20)
found crash : True , index : 570 and Crash Location (59, -132)
found crash : True , index : 571 and Crash Location (31, -82)
found crash : True , index : 572 and Crash Location (26, 50)
JUST ONE TRANSIT !!!
found crash : True , index : 574 and Crash Location (10, -84)
found crash : True , index : 

found crash : True , index : 697 and Crash Location (30, -87)
found crash : True , index : 698 and Crash Location (47, -92)
found crash : True , index : 699 and Crash Location (49, 6)
found crash : True , index : 700 and Crash Location (3, 117)
found crash : True , index : 701 and Crash Location (36, -105)
found crash : True , index : 702 and Crash Location (14, 120)
found crash : True , index : 703 and Crash Location (-1, 34)
found crash : True , index : 704 and Crash Location (34, -94)
found crash : False , index : 705 and Crash Location (-9, 147)
found crash : True , index : 706 and Crash Location (42, -89)
found crash : True , index : 707 and Crash Location (32, 51)
found crash : True , index : 708 and Crash Location (35, -75)
found crash : True , index : 709 and Crash Location (-25, -49)
found crash : True , index : 710 and Crash Location (-12, 44)
found crash : True , index : 711 and Crash Location (37, 40)
found crash : True , index : 712 and Crash Location (35, -80)
found crash

found crash : True , index : 835 and Crash Location (-28, -65)
found crash : True , index : 836 and Crash Location (-15, 23)
found crash : True , index : 837 and Crash Location (-2, 28)
found crash : True , index : 838 and Crash Location (15, 32)
found crash : True , index : 839 and Crash Location (3, 8)
found crash : True , index : 840 and Crash Location (43, -79)
found crash : True , index : 841 and Crash Location (38, 13)
found crash : True , index : 842 and Crash Location (59, 24)
found crash : True , index : 843 and Crash Location (38, 23)
found crash : True , index : 844 and Crash Location (10, -70)
found crash : True , index : 845 and Crash Location (-8, -74)
found crash : True , index : 846 and Crash Location (3, 98)
found crash : True , index : 847 and Crash Location (-4, 15)
found crash : True , index : 848 and Crash Location (-22, -43)
found crash : True , index : 849 and Crash Location (5, -66)
JUST ONE TRANSIT !!!
found crash : True , index : 851 and Crash Location (49, -9

found crash : True , index : 976 and Crash Location (44, -93)
found crash : False , index : 977 and Crash Location (49, -125)
JUST ONE TRANSIT !!!
found crash : True , index : 979 and Crash Location (2, 44)
found crash : True , index : 980 and Crash Location (40, -3)
found crash : True , index : 981 and Crash Location (14, -89)
found crash : True , index : 982 and Crash Location (42, 74)
found crash : True , index : 983 and Crash Location (0, -78)
found crash : True , index : 984 and Crash Location (-2, 28)
found crash : True , index : 985 and Crash Location (39, -82)
found crash : True , index : 986 and Crash Location (58, 56)
found crash : True , index : 987 and Crash Location (34, -81)
JUST ONE TRANSIT !!!
found crash : True , index : 989 and Crash Location (27, 86)
found crash : False , index : 990 and Crash Location (41, -88)
found crash : True , index : 991 and Crash Location (19, -99)
found crash : True , index : 992 and Crash Location (33, 43)
found crash : True , index : 993 a

In [28]:
data.to_csv('map_test', sep = '#')