In [27]:
import pandas as pd 
import geocoder
import requests
import sys
import pickle
import geopy.distance as dist
import numpy as np

In [109]:
data = pd.read_csv('Airplane_Crashes_and_Fatalities_Since_1908.csv')
data.drop(['Flight #','Registration','cn/In','Ground'], axis=1, inplace=True)

# Take only the last rows for testing
data = data.iloc[5240:]

In [111]:
def geocode(address):
    
    
    try :  
        
        # send the request
        url = 'https://maps.googleapis.com/maps/api/geocode/json?address={0},&key=AIzaSyC-964iuqZr0UHdYB3tGmMNhbyKVMXDneo'.format(address)
        r = requests.get(url)
        
        # get the address information in a json fomrat
        results = r.json()['results']
        
        # get the location informations
        location = results[0]['geometry']['location']
        
        return int(location['lat']), int(location['lng'])
    
    except :
        return 0,0

### Uncomment and Run if you want to recompute the geocoordinates of the crashes

In [112]:
# get lattitude longitude for each adress by applying the geocode function
data['LatLongCrash'] = data['Location'].apply(lambda address : geocode(address))

# drop the unlocalisable crashes
data.dropna(subset=['LatLongCrash'], inplace = True)

# Drop all crashes without Routes
data.dropna(subset=['Route'], how='any', inplace = True)


# Drop all crashes that could not be geolocated
data = data[data['LatLongCrash'] != (0,0)]
data = data[data['LatLongCrash'] != '(0, 0)']

# Reset index
data = data.reset_index()
data = data.iloc[:,1:]

# write the new dataFrane in a Csv File
data.to_csv('Crash_Df')

## Computes coordinates of the route bounding the Crash

In [144]:
# get all routes from the dataFrame and returns the LatLong coordinates of the location between where the crash happened  
def get_routes(crash_locations, routes) :
    
    # to store the inbetween location names
    start_names = []
    end_names = []
    
    # to store the inbetween location coordinates
    start_coordinates = []
    end_coordinates = []
    
    # store the routes to show on the visualisation
    to_show = []
    
    for i in range(len(routes)) :
        found_crash = False
        try : 
            transits = routes[i].split('-')
            
            # If the route is not specified juste go to the next route
            if(len(transits) == 1) :
                start_names.append('Just one route')
                end_names.append('Just one route')
                start_coordinates.append('Just one route')
                end_coordinates.append('Just one route')
                to_show.append(False)
                print('JUST ONE TRANSIT !!!')
                continue
                
            for j in range(len(transits)-1) :
                # get a route from the route
                start = transits[j]
                end = transits[j+1]

                # get geolocation to compute distance
                start_geo = geocode(start)
                end_geo = geocode(end)
                crash_loc = crash_locations[i]

                # computes distances
                start_end_dist = dist.vincenty(start_geo,end_geo).km
                start_crash_dist = dist.vincenty(start_geo, crash_loc).km
                
          
                
                # if the crash distance from starting point in smaller than the overall distance that it means that is in this route
                if (start_crash_dist < start_end_dist) :
                
                    found_crash = True
                    to_show.append(True)
                    # Store the values corresponding to the crash in arrays
                    print('found crash : {} , index : {} and Crash Location {}'.format(found_crash,i,crash_locations[i]))
                    start_names.append(start)
                    end_names.append(end)
                    start_coordinates.append(start_geo)
                    end_coordinates.append(end_geo)
                    break
                    
            # If didn't find crash location
            if found_crash == False :
                print('found crash : {} , index : {} and Crash Location {}'.format(found_crash,i,crash_locations[i]))
                start_names.append('couldn\'t find')
                end_names.append('couldn\'t find')
                start_coordinates.append('couldn\'t find')
                end_coordinates.append('couldn\'t find')
                to_show.append(False)
                
             
        except Exception as e :
            print('error index : {} , error : {}'.format(i,e))
            error = float('nan')
            start_names.append(error)
            end_names.append(error)
            start_coordinates.append(error)
            end_coordinates.append(error)
            to_show.append(False)
            
    
    return start_names, end_names, start_coordinates, end_coordinates, to_show

### Uncomment and Run the above cell to recompute the route corresponding to the crash

In [146]:
# Get the locations routes of crash by name geo coordinates
start, end, start_geo, end_geo, to_show = get_routes(data['LatLongCrash'], data['Route'].values)


# put the values found in a DataFrame
data['Start City Crash'] = start
data['End City Crash'] = end
data['Start City Geo Crash'] = start_geo
data['End City Geo Crash'] = end_geo
data['To_show'] = to_show


# Drop the Crash routes that could not be found
data.reset_index()
data.dropna(axis=1, how='any', inplace=True)

JUST ONE TRANSIT !!!
found crash : False , index : 1 and Crash Location (18, -66)
found crash : True , index : 2 and Crash Location (32, -96)
found crash : False , index : 3 and Crash Location (29, -90)
found crash : True , index : 4 and Crash Location (40, -74)
found crash : True , index : 5 and Crash Location (-3, -60)
found crash : True , index : 6 and Crash Location (41, 12)
found crash : False , index : 7 and Crash Location (43, -78)
found crash : True , index : 8 and Crash Location (25, 32)
found crash : False , index : 9 and Crash Location (52, 4)
found crash : True , index : 10 and Crash Location (0, 33)
found crash : False , index : 11 and Crash Location (47, -52)
found crash : False , index : 12 and Crash Location (46, -112)
found crash : False , index : 13 and Crash Location (35, 139)
found crash : False , index : 14 and Crash Location (57, -1)
JUST ONE TRANSIT !!!
found crash : False , index : 16 and Crash Location (-4, 138)
found crash : False , index : 17 and Crash Locati

In [149]:
data

Unnamed: 0,Date,Location,Operator,Route,Type,Aboard,Fatalities,Summary,LatLongCrash,Start City Crash,End City Crash,Start City Geo Crash,End City Geo Crash,To_show
0,11/27/2008,"Off Perpignan, France",XL Airways leased from Air New Zealand,Training,Airbus A320-232,7.0,7.0,The Airbus A320 was leased by XL Airways of Ge...,"(42, 2)",Just one route,Just one route,Just one route,Just one route,False
1,12/03/2008,"San Juan, Puerto Rico",Webstas Aviation Services Inc.,"Tortola Virgin Islands - San Juan, PR",Rockwell International 690B,3.0,3.0,The plane struck the side of El Yunque mountai...,"(18, -66)",couldn't find,couldn't find,couldn't find,couldn't find,False
2,12/11/2008,"Off Sabine Pass, Texas",Rotorcraft Leasing Co,Sabine Pass - Oil Platform,Bell 206-L4 Jet Ranger III,3.0,3.0,The helicopter ferrying workers to an off shor...,"(32, -96)",Sabine Pass,Oil Platform,"(29, -93)","(34, -119)",True
3,01/04/2009,Near Houma Louisiana,Petroleum Helicopters Inc,Bayou Penchant - Off shore oil fields,Sikorsky S-76C,9.0,8.0,A helicopter bound for offshore oil fields wen...,"(29, -90)",couldn't find,couldn't find,couldn't find,couldn't find,False
4,01/15/2009,"New York, New York",US Airways,"New York, NY- Charlotte, NC",Airbus A320-214,155.0,0.0,The plane was taking off from La Guardia Airpo...,"(40, -74)","New York, NY","Charlotte, NC","(40, -74)","(35, -80)",True
5,02/07/2009,"Manacapuru, Brazil",Aerotaxi Manaus,Coari - Manus,Bandeirante EMB-110P1,28.0,24.0,The plane was being used as an air taxi to fer...,"(-3, -60)",Coari,Manus,"(-4, -63)","(-2, 146)",True
6,02/07/2009,"Trigoria, Italy",Air One Executive,Rome - Bologna,Cessna 650 Citation III,2.0,2.0,"The plane, heading to Bologna to pick up a med...","(41, 12)",Rome,Bologna,"(41, 12)","(44, 11)",True
7,02/12/2009,"Clarence Center, New York",Continental Connection/Colgan Air,"Newark, N.J. - Buffalo, NY",Bombardier DHC-8-402 Q400,49.0,49.0,The commuter plane crashed while attemptiong t...,"(43, -78)",couldn't find,couldn't find,couldn't find,couldn't find,False
8,02/20/2009,"Luxor, Egypt",Aerolift,"Entebbe, Uganda - Luxor, Egypt - Niklaev, Ukraine",Antonov 12V,5.0,5.0,"While attemping to take off from Luxor, the ca...","(25, 32)","Luxor, Egypt","Niklaev, Ukraine","(25, 32)","(46, 31)",True
9,02/25/2009,"Amsterdam, Netherlands",Turkish Airlines,"Istanbul, Turkey - Amsterdam, Netherlands",Boeing 737-8F2,134.0,9.0,The plane was on final approach to Runway 18R ...,"(52, 4)",couldn't find,couldn't find,couldn't find,couldn't find,False


In [150]:
data.to_csv('map_test')