#### Investigating Airplane Accidents
Using search algorithms and data structures to explore airplane accident data.

In [1]:
import pandas as pd
import numpy as np
import datetime
aviation_list =[]

In [2]:
with open("AviationData.txt",'r') as f:
    aviation_data = f.readlines()
    for line in aviation_data:
        lines_split = line.split('|')
        aviation_list.append([r.strip() for r in lines_split]) 
    

In [3]:
#Algorith should be modular and abstract
def linearsearch_code(code):
    lax_code=[] 
    for row in aviation_list:
        if code in row:
            lax_code.append(row)
    return lax_code

codes = linearsearch_code("LAX94LA336")
        
codes   

[['20001218X45447',
  'Accident',
  'LAX94LA336',
  '07/19/1962',
  'BRIDGEPORT, CA',
  'United States',
  '',
  '',
  '',
  '',
  'Fatal(4)',
  'Destroyed',
  '',
  'N5069P',
  'PIPER',
  'PA24-180',
  'No',
  '1',
  'Reciprocating',
  '',
  '',
  'Personal',
  '',
  '4',
  '0',
  '0',
  '0',
  'UNK',
  'UNKNOWN',
  'Probable Cause',
  '09/19/1996',
  '']]

#### Hash Tables

Working with Hash Tables. Hash Tables is a data structure that stores data based on keys-value pairs.A dictionary is most common form of a hash table

In [4]:
aviation_dict_list= []
headers= aviation_data[0].split("|")
for i, line in enumerate(aviation_data):
    l = line.strip().split("|")
    d={k.strip():d.strip() for k,d in zip(headers, l)}
    aviation_dict_list.append(d)

In [5]:
lax_dict=[]
for d in aviation_dict_list:
    if "LAX94LA336" in d.values():
        lax_dict.append(d)    
        
lax_dict        

[{'Event Id': '20001218X45447',
  'Investigation Type': 'Accident',
  'Accident Number': 'LAX94LA336',
  'Event Date': '07/19/1962',
  'Location': 'BRIDGEPORT, CA',
  'Country': 'United States',
  'Latitude': '',
  'Longitude': '',
  'Airport Code': '',
  'Airport Name': '',
  'Injury Severity': 'Fatal(4)',
  'Aircraft Damage': 'Destroyed',
  'Aircraft Category': '',
  'Registration Number': 'N5069P',
  'Make': 'PIPER',
  'Model': 'PA24-180',
  'Amateur Built': 'No',
  'Number of Engines': '1',
  'Engine Type': 'Reciprocating',
  'FAR Description': '',
  'Schedule': '',
  'Purpose of Flight': 'Personal',
  'Air Carrier': '',
  'Total Fatal Injuries': '4',
  'Total Serious Injuries': '0',
  'Total Minor Injuries': '0',
  'Total Uninjured': '0',
  'Weather Condition': 'UNK',
  'Broad Phase of Flight': 'UNKNOWN',
  'Report Status': 'Probable Cause',
  'Publication Date': '09/19/1996',
  '': ''}]

Accessing and storing data in hash tables is very quick.however, using a hash table uses lot of memory

#### Accidents by U.S. State

##### Count how many accidents happend in the U.S state

In [6]:
aviation_dict_list

[{'Event Id': 'Event Id',
  'Investigation Type': 'Investigation Type',
  'Accident Number': 'Accident Number',
  'Event Date': 'Event Date',
  'Location': 'Location',
  'Country': 'Country',
  'Latitude': 'Latitude',
  'Longitude': 'Longitude',
  'Airport Code': 'Airport Code',
  'Airport Name': 'Airport Name',
  'Injury Severity': 'Injury Severity',
  'Aircraft Damage': 'Aircraft Damage',
  'Aircraft Category': 'Aircraft Category',
  'Registration Number': 'Registration Number',
  'Make': 'Make',
  'Model': 'Model',
  'Amateur Built': 'Amateur Built',
  'Number of Engines': 'Number of Engines',
  'Engine Type': 'Engine Type',
  'FAR Description': 'FAR Description',
  'Schedule': 'Schedule',
  'Purpose of Flight': 'Purpose of Flight',
  'Air Carrier': 'Air Carrier',
  'Total Fatal Injuries': 'Total Fatal Injuries',
  'Total Serious Injuries': 'Total Serious Injuries',
  'Total Minor Injuries': 'Total Minor Injuries',
  'Total Uninjured': 'Total Uninjured',
  'Weather Condition': 'Weat

In [7]:
from collections import Counter
states=[]
for r in aviation_dict_list:
    if r["Country"] == "United States" and ',' in r["Location"]:
        state = r["Location"].split(',')[1].strip()
        if len(state)==2:
            states.append(state)
            
state_accidents = Counter(states)
state_accidents.most_common(5)

[('CA', 8654), ('TX', 5666), ('FL', 5634), ('AK', 5470), ('AZ', 2736)]

In [8]:
#sorted(state_accidents.items(), key=lambda pair: pair[1], reverse=True)

California and texas are these states are having most aviation accidents 

##### Fatalities and Injuries by Month

In [None]:
month_names = []
for dictionary in aviation_dict_list:
    month_injuries = []
    if 'Event Date' in dictionary:
        split_date = dictionary['Event Date'].split('/')
        try:
            s_injuries = int(dictionary['Total Serious Injuries'])
        except:
            s_injuries = 0
        try:
            f_injuries = int(dictionary['Total Fatal Injuries'])
        except:
            f_injuries = 0
        try:
            MM_YYYY = split_date[0]+'/'+split_date[2]
        except:
            MM_YYYY = ""
        if len(MM_YYYY) == 7:
            month_injuries.append(MM_YYYY)
            month_injuries.append(s_injuries)
            month_injuries.append(f_injuries)
    month_names.append(month_injuries)

In [None]:
from collections import OrderedDict

monthly_injuries = {}
for accident in month_names:
    try:
        month = accident[0]
        s_injury = accident[1]
        f_injury = accident[2]
    except:
        continue
    if month not in monthly_injuries:
        monthly_injuries[month] = {'Serious Injury': s_injury, 'Fatal Injury': f_injury}
    if month in monthly_injuries:
        monthly_injuries[month]['Serious Injury'] += s_injury
        monthly_injuries[month]['Fatal Injury'] += f_injury

injuries_count = Counter(monthly_injuries) 
OrderedDict(sorted(injuries_count.items(), key=lambda t: t[0]))bb

counting number of accidents by air carriers

In [None]:
air_carriers = []
for c in aviation_dict_list:
    air_carrier = c["Air Carrier"].split(",")
    if air_carrier != '':
        air_carr = c['Air Carrier']
        air_carriers.append(air_carr)
        
count = Counter(air_carriers)
print(count.most_common(4))