## Dataset Source

In [5]:
#https://data.world/data-society/airplane-crashes

## Importing Libraries & Initial Notebook Setup

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:60% !important; }</style>"))
pd.set_option("max_rows", 300)
pd.options.display.max_columns=55

## Loading the Dataset

In [7]:
df = pd.read_csv("data/Airplane_Crashes_and_Fatalities_Since_1908.csv")

## Checking the dataset

In [8]:
# Missing values within the dataset are immeditly visible.
df.head(20)

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary
0,09/17/1908,17:18,"Fort Myer, Virginia",Military - U.S. Army,,Demonstration,Wright Flyer III,,1.0,2.0,1.0,0.0,"During a demonstration flight, a U.S. Army fly..."
1,07/12/1912,06:30,"AtlantiCity, New Jersey",Military - U.S. Navy,,Test flight,Dirigible,,,5.0,5.0,0.0,First U.S. dirigible Akron exploded just offsh...
2,08/06/1913,,"Victoria, British Columbia, Canada",Private,-,,Curtiss seaplane,,,1.0,1.0,0.0,The first fatal airplane accident in Canada oc...
3,09/09/1913,18:30,Over the North Sea,Military - German Navy,,,Zeppelin L-1 (airship),,,20.0,14.0,0.0,The airship flew into a thunderstorm and encou...
4,10/17/1913,10:30,"Near Johannisthal, Germany",Military - German Navy,,,Zeppelin L-2 (airship),,,30.0,30.0,0.0,Hydrogen gas which was being vented was sucked...
5,03/05/1915,01:00,"Tienen, Belgium",Military - German Navy,,,Zeppelin L-8 (airship),,,41.0,21.0,0.0,Crashed into trees while attempting to land af...
6,09/03/1915,15:20,"Off Cuxhaven, Germany",Military - German Navy,,,Zeppelin L-10 (airship),,,19.0,19.0,0.0,"Exploded and burned near Neuwerk Island, when..."
7,07/28/1916,,"Near Jambol, Bulgeria",Military - German Army,,,Schutte-Lanz S-L-10 (airship),,,20.0,20.0,0.0,"Crashed near the Black Sea, cause unknown."
8,09/24/1916,01:00,"Billericay, England",Military - German Navy,,,Zeppelin L-32 (airship),,,22.0,22.0,0.0,Shot down by British aircraft crashing in flames.
9,10/01/1916,23:45,"Potters Bar, England",Military - German Navy,,,Zeppelin L-31 (airship),,,19.0,19.0,0.0,Shot down in flames by the British 39th Home D...


## Checking and fomatting data types

In [9]:
#Checking data types of each column in the dataset.
df.dtypes

Date             object
Time             object
Location         object
Operator         object
Flight #         object
Route            object
Type             object
Registration     object
cn/In            object
Aboard          float64
Fatalities      float64
Ground          float64
Summary          object
dtype: object

In [10]:
#loading and formating dates
df = pd.read_csv("data/Airplane_Crashes_and_Fatalities_Since_1908.csv", parse_dates=["Date"])

In [11]:
#Rechecking data types of each column to make sure the dates are now in the correct format.
df.dtypes

Date            datetime64[ns]
Time                    object
Location                object
Operator                object
Flight #                object
Route                   object
Type                    object
Registration            object
cn/In                   object
Aboard                 float64
Fatalities             float64
Ground                 float64
Summary                 object
dtype: object

## Summarizing Data

In [12]:
# Based on this dataset we can observe that a total of 5268 crashes have been recorded from 17-09-1908 to 08-06-2009.
# We can observe here that Moscow, Russia has had the highest number of crashes.
# The number of crahses for Moscow, Russia is equal to 15.
df.describe(include="all")

  after removing the cwd from sys.path.


Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary
count,5268,3049,5248,5250,1069,3562,5241,4933.0,4040.0,5246.0,5256.0,5246.0,4878
unique,4753,1005,4303,2476,724,3244,2446,4905.0,3707.0,,,,4673
top,1973-02-28 00:00:00,15:00,"Moscow, Russia",Aeroflot,-,Training,Douglas DC-3,49.0,178.0,,,,Crashed during takeoff.
freq,4,32,15,179,67,81,334,3.0,6.0,,,,15
first,1908-09-17 00:00:00,,,,,,,,,,,,
last,2009-06-08 00:00:00,,,,,,,,,,,,
mean,,,,,,,,,,27.554518,20.068303,1.608845,
std,,,,,,,,,,43.076711,33.199952,53.987827,
min,,,,,,,,,,0.0,0.0,0.0,
25%,,,,,,,,,,5.0,3.0,0.0,


## Making New Features

In [13]:
#Obtaining the number of survivors by subtracting the total number of fatalities from the total number of people aboard.
df['Survived'] = df['Aboard'] - df['Fatalities']

In [14]:
#Obtaining thr survival rate by getting a % of survivors vs total people aboard.
df['SurvivalRate'] = (df['Survived']/df['Aboard'])*100

In [15]:
# We can also observe that the avegrage sruvival rate from 17-09-1908 to 08-06-2009 is 16.5%.
df.describe(include = "all")

  


Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
count,5268,3049,5248,5250,1069,3562,5241,4933.0,4040.0,5246.0,5256.0,5246.0,4878,5246.0,5244.0
unique,4753,1005,4303,2476,724,3244,2446,4905.0,3707.0,,,,4673,,
top,1973-02-28 00:00:00,15:00,"Moscow, Russia",Aeroflot,-,Training,Douglas DC-3,49.0,178.0,,,,Crashed during takeoff.,,
freq,4,32,15,179,67,81,334,3.0,6.0,,,,15,,
first,1908-09-17 00:00:00,,,,,,,,,,,,,,
last,2009-06-08 00:00:00,,,,,,,,,,,,,,
mean,,,,,,,,,,27.554518,20.068303,1.608845,,7.471026,16.50684
std,,,,,,,,,,43.076711,33.199952,53.987827,,28.144659,29.877115
min,,,,,,,,,,0.0,0.0,0.0,,0.0,0.0
25%,,,,,,,,,,5.0,3.0,0.0,,0.0,0.0


## Checking Missing Values

In [16]:
#Getting count of missing values in the data set
df.isna().sum()

Date               0
Time            2219
Location          20
Operator          18
Flight #        4199
Route           1706
Type              27
Registration     335
cn/In           1228
Aboard            22
Fatalities        12
Ground            22
Summary          390
Survived          22
SurvivalRate      24
dtype: int64

In [17]:
# Gauging the amount of missing values in comparison to the total amount
# The good thing about the dataset is that the columns we are particulary intrested in exploring don't have as many missing values.
# Date = 0, Location = 20, Operator = 18, Type = 27, Aboard = 22 Fatalities = 12 
df.count() / len(df)

Date            1.000000
Time            0.578778
Location        0.996203
Operator        0.996583
Flight #        0.202923
Route           0.676158
Type            0.994875
Registration    0.936409
cn/In           0.766894
Aboard          0.995824
Fatalities      0.997722
Ground          0.995824
Summary         0.925968
Survived        0.995824
SurvivalRate    0.995444
dtype: float64

In [18]:
#Getting a % of missing values within the "Time" column in comparison to the total values present.
#Further analysis shows that around 42.1% of "Time" are missing. (Value rounded off to 1 decimal places)

round(((len(df[df["Time"].isna()]))/(len(df["Time"])))*100, 1)

42.1

In [19]:
#Getting a % of missing values within the "Flight #" column in comparison to the total values present.
#Further analysis shows that around 79.7% of "Flight#" are missing. (Value rounded off to 1 decimal places)

round(((len(df[df["Flight #"].isna()]))/(len(df["Flight #"])))*100, 1)

79.7

In [20]:
#Getting a % of missing values within the "Route" column in comparison to the total values present.
#Further analysis shows that around 32.4% of "Route" values are missing. (Value rounded off to 1 decimal places)
round(((len(df[df["Route"].isna()]))/(len(df["Route"])))*100, 1)

32.4

In [21]:
#Getting a % of missing values within the "Route" column in comparison to the total values present.
#Further analysis shows that no "Route" values are missing. 
round(((len(df[df["Date"].isna()]))/(len(df["Date"])))*100, 1)

0.0

In [22]:
#Getting a % of missing values within the "Location" column in comparison to the total values present.
#Further analysis shows that around 0.4% of "Location" values are missing. (Value rounded off to 1 decimal places)
round(((len(df[df["Location"].isna()]))/(len(df["Location"])))*100, 1)

0.4

In [23]:
#Getting a % of missing values within the "Operator" column in comparison to the total values present.
#Further analysis shows that around 0.3% of "Operator" values are missing. (Value rounded off to 1 decimal places)
round(((len(df[df["Operator"].isna()]))/(len(df["Operator"])))*100, 1)

0.3

In [24]:
#Getting a % of missing values within the "Type" column in comparison to the total values present.
#Further analysis shows that around 0.5% of "Type" values are missing. (Value rounded off to 1 decimal places)
round(((len(df[df["Type"].isna()]))/(len(df["Type"])))*100, 1)

0.5

In [25]:
#Getting a % of missing values within the "Aboard" column in comparison to the total values present.
#Further analysis shows that around 0.4% of "Aboard" values are missing. (Value rounded off to 1 decimal places)
round(((len(df[df["Aboard"].isna()]))/(len(df["Aboard"])))*100, 1)

0.4

In [26]:
#Getting a % of missing values within the "Fatalities" column in comparison to the total values present.
#Further analysis shows that around 0.2% of "Fatalities" values are missing. (Value rounded off to 1 decimal places)
round(((len(df[df["Fatalities"].isna()]))/(len(df["Fatalities"])))*100, 1)

0.2

In [27]:
len(df[df["Location"].isna()])

20

In [28]:
len(df["Location"])

5268

In [29]:
20/5268

0.0037965072133637054

In [30]:
df[df["Operator"].isna()]

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
61,1922-04-08,,"Pao Ting Fou, China",,,,,,,17.0,17.0,0.0,All seventeen aboard were Chinese nationals.,0.0,0.0
67,1922-10-02,,"Venice, Italy",,,,de Havilland DH-9,G-EAYT,,4.0,4.0,0.0,,0.0,0.0
106,1926-09-26,,"New York, New York",,,New York - Paris,Sikorsky S-25,,,5.0,2.0,0.0,Too much weight caused the landing gear to col...,3.0,60.0
138,1928-03-03,,"Rio de Janeiro, Brazil",,,,,,,10.0,10.0,0.0,,0.0,0.0
164,1928-12-04,,"Rio de Janeiro, Brazil",,,,Junkers G24,,,6.0,6.0,0.0,The aircraft's wingtip struck the water and cr...,0.0,0.0
359,1935-05-29,,"San Barbra, Honduras",,,,,,,9.0,6.0,0.0,Crashed into the Ulua River.,3.0,33.333333
482,1938-08-10,,"Debrecen, Hungary",,,,Ford Tri-motor,,,12.0,12.0,0.0,The plane was carrying Hungarian journalists.,0.0,0.0
545,1940-11-09,,"Rio de Janeiro, Brazil",,,Rio de Janeiro - Sao Paulo,,,,18.0,18.0,0.0,Midair collisioin with a private plane.,0.0,0.0
618,1943-07-04,23:00,Gibraltar,,,,Consolidated Liberator B24 C,A-L-523,,13.0,12.0,0.0,"The flight, bound for London, crashed into the...",1.0,7.692308
704,1945-04-20,,,,,,Junkers JU-53/3m,D-ANAJ,,18.0,18.0,0.0,Missing on an evacuation flight from Berlin to...,0.0,0.0


In [31]:
df[df["Operator"].isna()].describe(include="all")

  """Entry point for launching an IPython kernel.


Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
count,18,3,17,0.0,0.0,7,12,9,3.0,18.0,18.0,18.0,15,18.0,18.0
unique,18,3,15,0.0,0.0,7,12,9,3.0,,,,15,,
top,1922-04-08 00:00:00,23:00,"Rio de Janeiro, Brazil",,,"Monterey, Mexico - San Ysidro, CA",Cessna 206 Seneca,OY-DVJ,4396.0,,,,Undershot the runway on final approach in snow...,,
freq,1,1,3,,,1,1,1,1.0,,,,1,,
first,1922-04-08 00:00:00,,,,,,,,,,,,,,
last,2007-03-14 00:00:00,,,,,,,,,,,,,,
mean,,,,,,,,,,9.388889,8.666667,0.0,,0.722222,11.035816
std,,,,,,,,,,4.924595,5.401525,0.0,,1.178511,19.392644
min,,,,,,,,,,3.0,2.0,0.0,,0.0,0.0
25%,,,,,,,,,,6.0,4.0,0.0,,0.0,0.0


## Fixing Missing Values

In [32]:
# Creating a copy of the dataframe to drop rows with missing values inacse we need to use a clean data frame going forward.
df_clean = df.copy()

In [33]:
# Dropping all rows with missing values
df_clean = df_clean.dropna()

In [34]:
# Dataframe without any missing values
df_clean

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
208,1930-01-19,18:23,"Oceanside, California",Maddux Airlines,7,"Aqua Caliente, Mexico - Los Angeles",Ford 5-AT-C Tri Motor,NC9689,5-AT-046,16.0,16.0,0.0,"While en route to Los Angeles, the pilot, flyi...",0.0,0.000000
236,1931-03-31,10:45,"Bazaar, Kansas",Trans Continental and Western Air,599,Kansas City - Wichita - Los Angeles,Fokker F10A Trimotor,NC-999,1063,8.0,8.0,0.0,"Shortly after taking off from Kansas City, one...",0.0,0.000000
334,1934-08-31,23:42,"Amazonia, Missouri",Rapid Air Transport,6,Omaha - St. Joseph,Stinson SM-6000B,NC10809,5004,5.0,5.0,0.0,The plane crashed about 11 miles from St. Jose...,0.0,0.000000
354,1935-05-06,03:30,"Atlanta, Missouri",Trans Continental and Western Air,6,Los Angeles - Albuquerque - Kanasas City - Wa...,Douglas DC-2-112,NC13785,1295,14.0,5.0,0.0,The plane crashed while en route from Albuquer...,9.0,64.285714
365,1935-08-14,23:45,"Near Gilmer, Texas",Delta Air Lines,4,Dallas - Atlanta,Stinson Model A,NC14599,9103,4.0,4.0,0.0,Crashed 3 miles south of Gilmer. The outboard ...,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5248,2009-02-12,22:17,"Clarence Center, New York",Continental Connection/Colgan Air,3407,"Newark, N.J. - Buffalo, NY",Bombardier DHC-8-402 Q400,N200WQ,4200,49.0,49.0,1.0,The commuter plane crashed while attemptiong t...,0.0,0.000000
5251,2009-02-25,10:31,"Amsterdam, Netherlands",Turkish Airlines,1951,"Istanbul, Turkey - Amsterdam, Netherlands",Boeing 737-8F2,TC-JGE,29789/1065,134.0,9.0,0.0,The plane was on final approach to Runway 18R ...,125.0,93.283582
5255,2009-03-23,06:48,"Tokyo, Japan",FedEx,80,"Guangzhou, China - Tokyo, Japan",McDonnell Douglas MD-11,N526FE,48600/560,2.0,2.0,0.0,The cargo plane crashed and burst into flame a...,0.0,0.000000
5259,2009-04-17,10:30,"Mount Gergaji, Indonesia",Mimika Air,514,Ilaga - Mulia,Pilatus PC-6,PK-LTJ,959,11.0,11.0,0.0,The passenger plane crashed in poor weather in...,0.0,0.000000


## Data Exploration 

In [35]:
# We can observe that out of 179 crashes suffered by Aeroflot 9 of those have been in Moscow, Russia followed by 4 near Moscow, Russia.
df[["Location","Operator"]].value_counts()

Location                             Operator                           
Moscow, Russia                       Aeroflot                               9
Manila, Philippines                  Philippine Air Lines                   7
Kunming, China                       China National Aviation Corporation    6
Near Moscow, Russia                  Aeroflot                               4
Sofia, Bulgaria                      Balkan Bulgarian Airlines              4
                                                                           ..
Near Tbilisi, Georgia                Tajikistan Airlines                    1
Near Tchamulate, Angola              Military - Cuban Air Force             1
Near Tchepone, Laos                  Military - U.S. Air Force              1
Near Tegal, Indonesia                PENAS                                  1
1,200 miles off Dakar, AtlantiOcean  Air France                             1
Length: 5080, dtype: int64

In [36]:
# The Military - U.S Airforce has suffered a total of 13 out of 176 crahses while flying on the training route. 
df[["Operator", "Route"]].value_counts()

Operator                        Route               
Military - U.S. Air Force       Training                13
Aeroflot                        Training                 4
Military - U.S. Air Force       Otis AFB                 3
Air France                      Training                 3
                                Saigon - Paris           3
                                                        ..
Mohawk Airlines                 Albany - Glenn Falls     1
Missionary Aviation Fellowship  Tabubil - Selbang        1
                                Tabubil - Bimin          1
Misrair                         Test                     1
A B Aerotransport               Malmo - Amsterdam        1
Length: 3484, dtype: int64

In [37]:
# It can be observed that the highest number of crahses have been flown by "Aeroflot" accounting for 179 crashes followed by "Military - 
# U.S Air Force" accounting for 176 crashes.
df[["Operator"]].value_counts()

Operator                                              
Aeroflot                                                  179
Military - U.S. Air Force                                 176
Air France                                                 70
Deutsche Lufthansa                                         65
United Air Lines                                           44
                                                         ... 
Military - U.S. Air Force / Military -  U.S. Air Force      1
Military - U.S. Air Force / Military -  U.S. Army           1
Military - U.S. Air Force / Military U.S. Air Force         1
Military - U.S. Air Force/Military - U.S. Air Force         1
L & J Company                                               1
Length: 2476, dtype: int64

In [38]:
# The highest number of crashes have occurred on the training route accounting for a total of 81 crashes followed by the sightseeing route which 
# accounts for a total of 29 crashes.
df[["Route"]].value_counts()

Route                     
Training                      81
Sightseeing                   29
Test flight                   17
Test                           6
Sao Paulo - Rio de Janeiro     5
                              ..
Newark, N.J. - Seattle, WA     1
Newark, N.J. - Buffalo, NY     1
Newark - Washington D.C.       1
Newark - Tacoma                1
 - Tegucigalpa - Toncontin     1
Length: 3244, dtype: int64

In [39]:
#The most common type of aircraft to crash has been the Douglas DC-3 accounting for a total of 334 crashes.
df[["Type"]].value_counts()

Type                                    
Douglas DC-3                                334
de Havilland Canada DHC-6 Twin Otter 300     81
Douglas C-47A                                74
Douglas C-47                                 62
Douglas DC-4                                 40
                                           ... 
Hawker Siddeley HS 125-400B (3)               1
Hawker Siddeley HS 748-260                    1
Hawker Siddeley HS-121 Trident 2E             1
Hawker Siddeley HS-125                        1
AAC-1 Toucan                                  1
Length: 2446, dtype: int64

In [40]:
# It can be observed that the US Aerial Mail Service had 26 De Havilland DH-4 type air crafts followed by Aeroflot having 19
# Yakovlev YAK-40 type air crafts.
df[["Type", "Operator"]].value_counts()

Type               Operator                           
De Havilland DH-4  US Aerial Mail Service                 26
Yakovlev YAK-40    Aeroflot                               19
Junkers JU-52/3m   Deutsche Lufthansa                     16
Douglas C-47       China National Aviation Corporation    15
Boeing KC-135A     Military - U.S. Air Force              15
                                                          ..
Ilyushin IL-18     Bulair TABSO                            1
                   Balkan Bulgarian Airlines               1
                   Air Guinee                              1
Ilyushin IL-14P    Shanxi Airlines                         1
AAC-1 Toucan       CTA LanguedoRoussillon                  1
Length: 4462, dtype: int64

In [41]:
df[df["Operator"] == "Aeroflot"]

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
825,1946-12-04,,"Meshed, Iran",Aeroflot,,Meshed - Teheran,Lisunov Li-2,,,24.0,24.0,0.0,Crashed shortly after takeoff from Meshed airp...,0.0,0.0
1190,1952-03-26,,"Moscow, Russia",Aeroflot,,,,,,70.0,70.0,0.0,The plane overshot the runway and collided wit...,0.0,0.0
1273,1953-07-27,12:30,"Near Kanggye, North Korea",Aeroflot,,,Ilyushin IL-12,,,21.0,21.0,0.0,Shot down by a U.S. Air Force F-86 jet fighter...,0.0,0.0
1355,1954-12-29,,"Near Moscow, Russia",Aeroflot,,,,,,45.0,45.0,0.0,,0.0,0.0
1356,1954-12-31,,"Irkutsk, Russia",Aeroflot,,Peking - Irkutsk - Cyprus,Ilyushin 14,,,17.0,17.0,0.0,Crashed during takeoff.,0.0,0.0
1386,1955-08-06,,"Near Voronezh, Russia",Aeroflot,,Starllingrad - Moscow.,,,,25.0,25.0,0.0,Crashed en route.,0.0,0.0
1494,1957-08-15,,"Copenhagen, Denmark",Aeroflot,,Riga - Copenhagen,Ilyushin IL-14P,CCCP-L1874,146000607,23.0,23.0,0.0,Crashed into the harbor after hitting the chim...,0.0,0.0
1553,1958-08-15,,"Near Chita, Russia",Aeroflot,,,Tupolev TU-104-A,CCCP-L5442,,64.0,64.0,0.0,The aircraft stalled after flying over a thund...,0.0,0.0
1565,1958-10-17,,"Near Kanash, Russia",Aeroflot,,Peking - Moscow,Tupolev TU-104A,CCCP-42362,,80.0,80.0,0.0,The plane was on a flight from Peking to Mosco...,0.0,0.0
1628,1959-11-16,,"Lvov, Ukraine, USSR",Aeroflot,,,Antonov AN-10,CCCP-11167,9401402,40.0,40.0,0.0,Crashed during approach.,0.0,0.0


In [42]:
# It can be observed that the highest number of crashes (total of 15) have taken place in Sao Paulo, Brazil and
# Moscow, Russia
df[["Location"]].value_counts()

Location                           
Sao Paulo, Brazil                      15
Moscow, Russia                         15
Rio de Janeiro, Brazil                 14
Bogota, Colombia                       13
Manila, Philippines                    13
                                       ..
Near Silchar, India                     1
Near Sihanoukville, Cambodia            1
Near Sibyak, Indonesia                  1
Near Siagon, Vietnam                    1
1,200 miles off Dakar, AtlantiOcean     1
Length: 4303, dtype: int64

In [43]:
# We can also observe that Aeroflot was the operator for atleast 9 of total 15 crashes in Moscow, Russia.
Moscow = df[df["Location"] == "Moscow, Russia"]
len(Moscow[Moscow["Operator"] == "Aeroflot"])

9

In [44]:
df[["Route","Fatalities"]].value_counts()

Route                           Fatalities
Training                        5.0           15
                                3.0           14
                                6.0           14
                                4.0           10
                                2.0            9
                                              ..
North Caicos - Grand Turk       1.0            1
North Adams, MA - New York, NY  2.0            1
Norman, OK - Manhattan, KS      2.0            1
Norisk - Krasnoyarsk            11.0           1
 - Tegucigalpa - Toncontin      10.0           1
Length: 3454, dtype: int64

In [45]:
df[df["Route"] == "Training"]

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
268,1932-03-08,02:00,"Saint Louis, Illinois",Century Air Lines,,Training,Stinson SM-6000B,,,5.0,2.0,0.0,"Crashed into a skeleton of an old windmill, 50...",3.0,60.0
866,1947-05-11,09:39,"Delaware Bay, New Jersey",Trans Continental and Western Air,,Training,Lockheed L-049 Constellation,NC86508,2029,4.0,4.0,0.0,"Went into a turn and lost control, spiraled in...",0.0,0.0
900,1947-11-18,13:00,"Newcastle, Delaware",Trans Continental and Western Air,,Training,Lockheed L-049-46-26 Constellation,NC86507,2028,5.0,5.0,0.0,Crashed short of the runway and burst into fla...,0.0,0.0
1084,1950-10-13,,"Almelund, Minnisota",Northwest Orient Airlines,,Training,Martin 202,NC93037,9158,6.0,6.0,0.0,Crashed after making a steep right turn. The u...,0.0,0.0
1145,1951-08-11,09:25,"Moisville, France",Air France,,Training,Douglas DC-3D,F-BAXB,42971,5.0,5.0,0.0,Abnormal flight maneuver made when the crew ex...,0.0,0.0
1164,1951-12-04,07:25,"Denver, Colorado",United Air Lines,,Training,Douglas DC-3A,N17109,4999,3.0,3.0,0.0,"Stalled, entered a spin and crashed. An inadve...",0.0,0.0
1186,1952-03-12,,"Near Sequin, Texas",Military - U.S. Air Force / U.S. Air Force,,Training,Boeing B-29 / Boeing B-29,,,15.0,15.0,,While on a training mission and flying blind o...,0.0,0.0
1346,1954-10-31,15:10,"Mangalore, Australia",TAA,,Training,Vickers 720 Viscount,VH-TVA,44,8.0,3.0,0.0,Crashed during takeoff. An error of judgement ...,5.0,62.5
1373,1955-04-04,15:55,"Islip, New York",United Air Lines,,Training,Douglas DC-6,N37512,43001/32,3.0,3.0,0.0,Suddenly dove into the ground. Unintentional m...,0.0,0.0
1456,1956-12-12,09:00,"Dannemois, France",Air France,,Training,Vickers 708 Viscount,F-BGNK,8,5.0,5.0,0.0,Crashed in a steep angle of attack.,0.0,0.0


In [46]:
df[df["Operator"] == "Aeroflot"]

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
825,1946-12-04,,"Meshed, Iran",Aeroflot,,Meshed - Teheran,Lisunov Li-2,,,24.0,24.0,0.0,Crashed shortly after takeoff from Meshed airp...,0.0,0.0
1190,1952-03-26,,"Moscow, Russia",Aeroflot,,,,,,70.0,70.0,0.0,The plane overshot the runway and collided wit...,0.0,0.0
1273,1953-07-27,12:30,"Near Kanggye, North Korea",Aeroflot,,,Ilyushin IL-12,,,21.0,21.0,0.0,Shot down by a U.S. Air Force F-86 jet fighter...,0.0,0.0
1355,1954-12-29,,"Near Moscow, Russia",Aeroflot,,,,,,45.0,45.0,0.0,,0.0,0.0
1356,1954-12-31,,"Irkutsk, Russia",Aeroflot,,Peking - Irkutsk - Cyprus,Ilyushin 14,,,17.0,17.0,0.0,Crashed during takeoff.,0.0,0.0
1386,1955-08-06,,"Near Voronezh, Russia",Aeroflot,,Starllingrad - Moscow.,,,,25.0,25.0,0.0,Crashed en route.,0.0,0.0
1494,1957-08-15,,"Copenhagen, Denmark",Aeroflot,,Riga - Copenhagen,Ilyushin IL-14P,CCCP-L1874,146000607,23.0,23.0,0.0,Crashed into the harbor after hitting the chim...,0.0,0.0
1553,1958-08-15,,"Near Chita, Russia",Aeroflot,,,Tupolev TU-104-A,CCCP-L5442,,64.0,64.0,0.0,The aircraft stalled after flying over a thund...,0.0,0.0
1565,1958-10-17,,"Near Kanash, Russia",Aeroflot,,Peking - Moscow,Tupolev TU-104A,CCCP-42362,,80.0,80.0,0.0,The plane was on a flight from Peking to Mosco...,0.0,0.0
1628,1959-11-16,,"Lvov, Ukraine, USSR",Aeroflot,,,Antonov AN-10,CCCP-11167,9401402,40.0,40.0,0.0,Crashed during approach.,0.0,0.0


In [47]:
df[["Location","Time", "Operator"]].value_counts()

Location                                  Time   Operator                                  
Near Leh, India                           11:00  Military - Indian Air Force                   2
La Verne, California                      06:05  Jack N. Boswick - Air Taxi                    1
Lander, Wyoming                           23:13  Denver Air Center                             1
Lamoille, Nevada                          16:36  Air Taxi - El Aero Services Inc.              1
Lakhta, Russia                            20:44  Military - Russian Navy                       1
                                                                                              ..
Near Souda, Crete                         17:30  Military - West German Air Force              1
Near Songnam-si, South Korea              14:40  Military - Republiof South Korea Air Force    1
Near Solo, Indonesia                      16:29  Garuda Indonesia Airlines                     1
Near Sokotu, Nigeria               

In [48]:
df_clean[df_clean['Operator'].str.contains('Military')]

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary,Survived,SurvivalRate
597,1942-10-23,17:15,"Palm Springs, California",American Airlines / Military - USAF,28,Palm Springs - New York City,Douglas DC-3 / Boeing B-34,NC16017,1555,13.0,12.0,0.0,Midair collision between an Army bomber and ai...,1.0,7.692308
720,1945-07-12,14:36,"Near Florence, South Carolina",Eastern Air Lines / Military - U.S. Army Air C...,45,Boston - NY - Washington DC - Jacksonville - M...,Douglas DC-3-201C / Army A-26,NC25647,2235,10.0,3.0,0.0,After deviating off course 8 miles to avoid mi...,7.0,70.0
1014,1949-07-30,10:45,"Chesterfield, New Jersey",Eastern Air Lines / Military - USN,557,New York City - Wilmington,Douglas DC-3 -201D/ F-6-F- 5 Hellcat,N19963/BU72887,2260 /,16.0,16.0,0.0,Midair collision with a US Navy F-6F. The F-6...,0.0,0.0
1027,1949-11-01,11:45,"Arlington, Virginia",Eastern Air Lines / Military - Bolivian Air Force,537,Boston - Washington D.C. - New Orleans,Douglas C-54B / P-38,N88727/NX26927,18365 /,55.0,55.0,0.0,Midair collision. The P-38 hit the airliner fr...,0.0,0.0
1124,1951-04-25,11:49,"Key West, Florida",Cubana de Aviacion / Military - US Navy,493,Miami - Havana,Douglas DC-4 /Beechcraft SMB-1,CU-T188/39939,10368,43.0,43.0,0.0,"The plane, on a flight from Miami to Cuba, col...",0.0,0.0
1535,1958-04-21,08:30,"Near Sloan, 10 miles SW of Las Vegas, Nevada",United Air Lines / Military - U.S. Air Force,736,Los Angeles - New York City,Douglas DC-7 / F-100F,N6328C/56-3755,45142 /243-31,49.0,49.0,0.0,The DC-7 was en route from Los Angeles to Denv...,0.0,0.0
1537,1958-05-20,11:29,"Brunswick, Maryland",Capital Airlines / Military - Air National Guard,300,Pittsburgh - Baltimore,Vickers Viscount 745D / T-33A,N7410/53-5966,108,11.0,11.0,0.0,"Midair collision at 8,000 ft., four miles ENE ...",0.0,0.0
1833,1963-02-01,17:15,"Ankara, Turkey",Middle East Airlines / Military - Turkish Air ...,265,Nicosia - Ankara,Vickers Viscount 754D,OD-ADE,244,17.0,17.0,87.0,Midair collision between a civilian and milita...,0.0,0.0
1988,1965-07-11,22:22,"Off Nantucket, Massachusetts",Military - U.S. Air Force,-,Otis AFB,EC-121H (Super Constellation),55-1036,4409,19.0,16.0,0.0,The aircraft experienced a lost of the No. 2 e...,3.0,15.789474
2081,1966-11-11,01:43,"Off Chatham, Massachusetts",Military - U.S. Air Force,-,Otis AFB,Lockheed EC-121H,55-5262,4413,19.0,19.0,0.0,The aircraft was observed flying low and emitt...,0.0,0.0
