# Import Zone

In [118]:
import pandas as pd
import matplotlib.pyplot as plt

## Load dataset

In [4]:
df = pd.read_csv('data/Aircraft_Incident_Dataset.csv')
df_types = df.dtypes
df_shape = df.shape
print(f'The size of the dataframe is {df_shape[0]} rows and {df_shape[1]} columns.')
print('\nThe columns have the following names & types: ')
print(df_types)

The size of the dataframe is 23519 rows and 23 columns.

The columns have the following names & types: 
Incident_Date           object
Aircaft_Model           object
Aircaft_Registration    object
Aircaft_Operator        object
Aircaft_Nature          object
Incident_Category       object
Incident_Cause(es)      object
Incident_Location       object
Aircaft_Damage_Type     object
Date                    object
Time                    object
Arit                    object
Aircaft_Engines         object
Onboard_Crew            object
Onboard_Passengers      object
Onboard_Total           object
Fatalities               int64
Aircaft_First_Flight    object
Aircraft_Phase          object
Departure_Airport       object
Destination_Airport     object
Ground_Casualties       object
Collision_Casualties    object
dtype: object


## Data Segmentation

Extract only the information related with Domestic Scheduled Passenger (DSP), separating the dataframe in Incidents DSP with fatalities and DSP without fatalities:

In [74]:
df_dsp_wofatalities = df[(df["Aircaft_Nature"] == "Domestic Scheduled Passenger") & (df["Fatalities"] == 0)]
df_dsp_wofatalities_shape = df_dsp_wofatalities.shape
df_dsp_wfatalities  = df[(df["Aircaft_Nature"] == "Domestic Scheduled Passenger") & (df["Fatalities"] > 0)]
df_dsp_wfatalities_shape = df_dsp_wfatalities.shape

print(f'The number of records of Domestic Scheduled Passenger with fatalities are: {df_dsp_wfatalities_shape[0]}')
print(f'The number of records of Domestic Scheduled Passenger without fatalities are: {df_dsp_wofatalities_shape[0]}')

The number of records of Domestic Scheduled Passenger with fatalities are: 1755
The number of records of Domestic Scheduled Passenger without fatalities are: 2249


# Causes

## Determine the most common reason/cause of airplane crashes and their fatality rate?

### What are the categories of cause most common in accidents with fatalities?

In [117]:
#Extract just the data related with incident cause (Column 7) when the accidents have fatalities.
all_causes_wfatalities = df_dsp_wfatalities['Incident_Cause(es)'].values

#Separate each value by "," Character, due to each value has multiple causes, For example: Airplane - Engines, Airplane - Engines - Fire, Cargo - Overloaded
all_causes_wfatalities_separated = []
for cause in all_causes_wfatalities:
    cause_splits = cause.split(",")
    all_causes_wfatalities_separated.append(cause_splits)

#Iterate each flight to get just the first word of each cause, for example : Airplane - Engines -> Airplane and eliminating the duplicates in each element.
all_causes_wfatalities_separated_unique = []
for i in all_causes_wfatalities_separated:
    all_causes_wfatalities_separated_unique.append([set(cause.split("-")[0].strip() for cause in i)])

#Transform the set in a flat list with all the causes
flat_list_all_causes_wfatalities = [element for list in all_causes_wfatalities_separated_unique for set_causes in list for element in set_causes]

#Transform the previous list in a series to following manipulation
series_all_causes_wfatalities = pd.Series(flat_list_all_causes_wfatalities)

#Count the frecuency of each cause and delete word mountain due to it is not a category and take just the top 5 values
series_all_causes_wfatalities_frecuency = series_all_causes_wfatalities.value_counts().drop('mountain')[:5]
series_all_causes_wfatalities_frecuency



Result      1281
Info         369
Airplane     224
Weather      164
Security     137
dtype: int64

In [112]:
all_causes_wfatalities_separated

[['Result - Loss of control'],
 ['Result - CFIT - Hill', ' mountain'],
 ['Result - CFIT - Hill', ' mountain'],
 ['Result - Runway excursion'],
 ['Airplane - Engines',
  ' Airplane - Engines - All engine powerloss',
  ' Result - Loss of control'],
 ['Result - Hijacking - Plane stormed', ' Security - Hijack'],
 ['Airplane - Systems',
  ' Airplane - Systems - Electrical system',
  ' Landing/takeoff - Landing',
  ' Landing/takeoff - Landing - Bounced',
  ' Result - Emergency',
  ' forced landing - On runway'],
 ['Result - Runway excursion'],
 ['Airplane - Undercarriage',
  ' Airplane - Undercarriage - Brakes',
  ' Maintenance - Wrong installation of parts',
  ' Result - Runway excursion'],
 ['Result - Loss of control'],
 ['Info-Unavailable'],
 ['Result - Loss of control', ' Result - Loss of control (presumed)'],
 ['Airplane - Instruments',
  ' Airplane - Instruments - Pitot heads',
  ' Result - Loss of control'],
 ['Result - Loss of control'],
 ['Airplane - Engines',
  ' Airplane - Engines