In [7]:
import json
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import seaborn as sns
import csv

In [8]:
df = pd.read_csv('data/Aviation_data.csv', encoding = 'ISO-8859-1', low_memory = False)


In [9]:
df.head()


Unnamed: 0,Event.Id,Investigation.Type,Accident.Number,Event.Date,Location,Country,Latitude,Longitude,Airport.Code,Airport.Name,...,Purpose.of.flight,Air.carrier,Total.Fatal.Injuries,Total.Serious.Injuries,Total.Minor.Injuries,Total.Uninjured,Weather.Condition,Broad.phase.of.flight,Report.Status,Publication.Date
0,20001218X45444,Accident,SEA87LA080,1948-10-24,"MOOSE CREEK, ID",United States,,,,,...,Personal,,2.0,0.0,0.0,0.0,UNK,Cruise,Probable Cause,
1,20001218X45447,Accident,LAX94LA336,1962-07-19,"BRIDGEPORT, CA",United States,,,,,...,Personal,,4.0,0.0,0.0,0.0,UNK,Unknown,Probable Cause,19-09-1996
2,20061025X01555,Accident,NYC07LA005,1974-08-30,"Saltville, VA",United States,36.922223,-81.878056,,,...,Personal,,3.0,,,,IMC,Cruise,Probable Cause,26-02-2007
3,20001218X45448,Accident,LAX96LA321,1977-06-19,"EUREKA, CA",United States,,,,,...,Personal,,2.0,0.0,0.0,0.0,IMC,Cruise,Probable Cause,12-09-2000
4,20041105X01764,Accident,CHI79FA064,1979-08-02,"Canton, OH",United States,,,,,...,Personal,,1.0,2.0,,0.0,VMC,Approach,Probable Cause,16-04-1980


In [10]:
df['Aircraft.Category'].value_counts()

Airplane             27617
Helicopter            3440
Glider                 508
Balloon                231
Gyrocraft              173
Weight-Shift           161
Powered Parachute       91
Ultralight              30
Unknown                 14
WSFT                     9
Powered-Lift             5
Blimp                    4
UNK                      2
Rocket                   1
ULTR                     1
Name: Aircraft.Category, dtype: int64

In [11]:
df_aircraft = df[df['Aircraft.Category']=='Airplane']
#filtered 'Aircraft.Category' to just airplanes

In [12]:
df_aircraft.head()


Unnamed: 0,Event.Id,Investigation.Type,Accident.Number,Event.Date,Location,Country,Latitude,Longitude,Airport.Code,Airport.Name,...,Purpose.of.flight,Air.carrier,Total.Fatal.Injuries,Total.Serious.Injuries,Total.Minor.Injuries,Total.Uninjured,Weather.Condition,Broad.phase.of.flight,Report.Status,Publication.Date
5,20170710X52551,Accident,NYC79AA106,1979-09-17,"BOSTON, MA",United States,42.445277,-70.758333,,,...,,Air Canada,,,1.0,44.0,VMC,Climb,Probable Cause,19-09-2017
7,20020909X01562,Accident,SEA82DA022,1982-01-01,"PULLMAN, WA",United States,,,,BLACKBURN AG STRIP,...,Personal,,0.0,0.0,0.0,2.0,VMC,Takeoff,Probable Cause,01-01-1982
8,20020909X01561,Accident,NYC82DA015,1982-01-01,"EAST HANOVER, NJ",United States,,,N58,HANOVER,...,Business,,0.0,0.0,0.0,2.0,IMC,Landing,Probable Cause,01-01-1982
12,20020917X02148,Accident,FTW82FRJ07,1982-01-02,"HOMER, LA",United States,,,,,...,Personal,,0.0,0.0,1.0,0.0,IMC,Cruise,Probable Cause,02-01-1983
13,20020917X02134,Accident,FTW82FRA14,1982-01-02,"HEARNE, TX",United States,,,T72,HEARNE MUNICIPAL,...,Personal,,1.0,0.0,0.0,0.0,IMC,Takeoff,Probable Cause,02-01-1983


In [13]:
df_aircraft['Aircraft.Category'].value_counts()


Airplane    27617
Name: Aircraft.Category, dtype: int64

In [14]:
df_aircraft.isna().sum()


Event.Id                      0
Investigation.Type            0
Accident.Number               0
Event.Date                    0
Location                      7
Country                       7
Latitude                   5525
Longitude                  5534
Airport.Code               9844
Airport.Name               9361
Injury.Severity             814
Aircraft.damage            1282
Aircraft.Category             0
Registration.Number         226
Make                          9
Model                        31
Amateur.Built                17
Number.of.Engines          2754
Engine.Type                4226
FAR.Description             499
Schedule                  24627
Purpose.of.flight          3739
Air.carrier               16350
Total.Fatal.Injuries       3165
Total.Serious.Injuries     3224
Total.Minor.Injuries       2878
Total.Uninjured             900
Weather.Condition          3053
Broad.phase.of.flight     21209
Report.Status              4971
Publication.Date           2240
dtype: i

In [15]:
df_aircraft_clean = df_aircraft.dropna(subset=['Make', 'Model'])
#getting rid of rows that do not contain make/model

In [16]:
df_aircraft_clean.isna().sum()


Event.Id                      0
Investigation.Type            0
Accident.Number               0
Event.Date                    0
Location                      7
Country                       7
Latitude                   5498
Longitude                  5507
Airport.Code               9825
Airport.Name               9346
Injury.Severity             812
Aircraft.damage            1279
Aircraft.Category             0
Registration.Number         223
Make                          0
Model                         0
Amateur.Built                17
Number.of.Engines          2749
Engine.Type                4213
FAR.Description             499
Schedule                  24598
Purpose.of.flight          3730
Air.carrier               16323
Total.Fatal.Injuries       3159
Total.Serious.Injuries     3216
Total.Minor.Injuries       2871
Total.Uninjured             894
Weather.Condition          3044
Broad.phase.of.flight     21195
Report.Status              4962
Publication.Date           2237
dtype: i

In [17]:
df_aircraft_clean['Make'].value_counts()


CESSNA            4864
Cessna            3607
PIPER             2804
Piper             1910
BOEING            1034
                  ... 
HEMMER               1
W.H. Hunnicutt       1
CARR BRYAN           1
SHPAKOW THOMAS       1
ORLICAN S R O        1
Name: Make, Length: 3869, dtype: int64

In [18]:
df_aircraft_clean['Make'].value_counts().head(15)


CESSNA                4864
Cessna                3607
PIPER                 2804
Piper                 1910
BOEING                1034
BEECH                 1018
Beech                  674
Boeing                 287
MOONEY                 238
CIRRUS DESIGN CORP     218
AIR TRACTOR INC        217
AIRBUS                 216
Mooney                 181
Grumman                173
BELLANCA               158
Name: Make, dtype: int64

In [19]:
df['Make'].value_counts()


Cessna             22227
Piper              12029
CESSNA              4922
Beech               4330
PIPER               2841
                   ...  
Leonard Walters        1
Maule Air Inc.         1
Motley Vans            1
Perlick                1
ROYSE RALPH L          1
Name: Make, Length: 8237, dtype: int64

In [20]:
df_aircraft_cleaned = df.dropna(subset=['Make'])


In [21]:
df_aircraft_cleaned.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 88826 entries, 0 to 90347
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Event.Id                88826 non-null  object 
 1   Investigation.Type      88826 non-null  object 
 2   Accident.Number         88826 non-null  object 
 3   Event.Date              88826 non-null  object 
 4   Location                88774 non-null  object 
 5   Country                 88601 non-null  object 
 6   Latitude                34360 non-null  object 
 7   Longitude               34351 non-null  object 
 8   Airport.Code            50235 non-null  object 
 9   Airport.Name            52775 non-null  object 
 10  Injury.Severity         87843 non-null  object 
 11  Aircraft.damage         85650 non-null  object 
 12  Aircraft.Category       32275 non-null  object 
 13  Registration.Number     87547 non-null  object 
 14  Make                    88826 non-null

In [22]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].str.strip().str.upper()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].str.strip().str.upper()


In [23]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
    "AEROSTAR INTERNATIONAL INC": "AEROSTAR INTERNATIONAL",
    "AEROSTAR INTERNATIONAL INC.": "AEROSTAR INTERNATIONAL",
    "AEROSTAR INTERNATIONAL, INC.": "AEROSTAR INTERNATIONAL"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [24]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
    "AMERICAN CHAMPION (ACAC)": "AMERICAN CHAMPION",
    "AMERICAN CHAMPION AIRCRAFT": "AMERICAN CHAMPION",
    "AMERICAN CHAMPION AIRCRAFT COR": "AMERICAN CHAMPION"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [25]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
    "AMERICAN LEGAND AIRCRAFT": "AMERICAN LEGEND",
    "AMERICAN LEGEND AIRCRAFT CO": "AMERICAN LEGEND",
    "AMERICAN LEGEND AIRCRAFT CO.": "AMERICAN LEGEND",
    "AMERICAN LEGEND AIRCRAFT COMPA": "AMERICAN LEGEND"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [26]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
    "FIREFLY BALLOON, INC.": "FIREFLY",
    "FIREFLY BALLOONS": "FIREFLY",
    "FIREFLY BALLOONS 2010 INC": "FIREFLY",
    "FIREFLY BALLOONS INC": "FIREFLY",
    "FIREFLY BALLOONS, INC": "FIREFLY"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [27]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
    "AIR TRACTOR INC": "AIR TRACTOR",
    "AIR TRACTOR INC.": "AIR TRACTOR",
    "AIR TRACTOR, INC.": "AIR TRACTOR"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [28]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
"GRUMMAN ACFT ENG":"GRUMMAN",
"GRUMMAN ACFT ENG COR":"GRUMMAN",
"GRUMMAN ACFT ENG COR-SCHWEIZER":"GRUMMAN",
"GRUMMAN AIRCRAFT":"GRUMMAN",
"GRUMMAN AIRCRAFT COR-SCHWEIZER":"GRUMMAN",
"GRUMMAN AIRCRAFT ENG CORP":"GRUMMAN",
"GRUMMAN AMERICAN":"GRUMMAN",
"GRUMMAN AMERICAN AVIATION":"GRUMMAN",
"GRUMMAN AMERICAN AVIATION CORP":"GRUMMAN",
"GRUMMAN AMERICAN AVN. CORP":"GRUMMAN",
"GRUMMAN AMERICAN AVN. CORP.":"GRUMMAN",
"GRUMMAN AMERICAN CORPORATION":"GRUMMAN",
"GRUMMAN SCHWEIZER":"GRUMMAN",
"GRUMMAN-SCHWEIZER":"GRUMMAN"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [29]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
 "CESNA": "CESSNA",
    "CESSNA AIRCRAFT": "CESSNA",
    "CESSNA AIRCRAFT CO": "CESSNA",
    "CESSNA AIRCRAFT CO.": "CESSNA",
    "CESSNA AIRCRAFT COMPANY": "CESSNA",
    "CESSNA ECTOR":"CESSNA",
    "CESSNA REEMS":"CESSNA",
    "CESSNA REIMS":"CESSNA",
    "CESSNA ROBERTSON":"CESSNA",
    "CESSNA SKYHAWK II":"CESSNA",
    "CESSNA SOLOY":"CESSNA",
    "CESSNA WREN":"CESSNA",
    "CESSNA/AIR REPAIR INC":"CESSNA",
    "CESSNA/WEAVER":"CESSNA"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [30]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
"PIPER / LAUDEMAN":"PIPER",
"PIPER AEROSTAR":"PIPER",
"PIPER AIRCRAFT":"PIPER",
"PIPER AIRCRAFT CORPORATION":"PIPER",
"PIPER AIRCRAFT INC":"PIPER",
"PIPER AIRCRAFT, INC.":"PIPER",
"PIPER CUB CRAFTERS":"PIPER",
"PIPER PAWNEE":"PIPER",
"PIPER-AEROSTAR":"PIPER",
"PIPER-HARRIS":"PIPER",
"PIPER/CUB CRAFTERS":"PIPER",
"PIPER/STEVENS":"PIPER",
"PIPER/WALLY'S FLYERS INC":"PIPER"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [31]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
"BOEING (STEARMAN)":"BOEING",
"BOEING - CANADA (DE HAVILLAND)":"BOEING",
"BOEING 777-306ER":"BOEING",
"BOEING COMMERCIAL AIRPLANE GRO":"BOEING",
"BOEING COMPANY":"BOEING",
"BOEING COMPANY, LONG BEACH DIV":"BOEING",
"BOEING HELICOPTERS DIV.":"BOEING",
"BOEING OF CANADA/DEHAV DIV":"BOEING",
"BOEING STEARMAN":"BOEING",
"BOEING VERTOL":"BOEING",
"BOEING-BROWN":"BOEING",
"BOEING-STEARMAN":"BOEING",
"BOEING-VERTOL":"BOEING"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [32]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
"ROBINSON HELICOPTER":"ROBINSON",
"ROBINSON HELICOPTER CO":"ROBINSON",
"ROBINSON HELICOPTER CO INC":"ROBINSON",
"ROBINSON HELICOPTER CO.":"ROBINSON",
"ROBINSON HELICOPTER COMPANY":"ROBINSON",
"ROBINSON HELICOPTERS":"ROBINSON",
"ROBINSON MICHAEL E":"ROBINSON",
"ROBINSON STEWART J":"ROBINSON"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [33]:
df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({
"BEECH AIRCRAFT":"BEECH",
"BEECH AIRCRAFT CO.":"BEECH",
"BEECH AIRCRAFT CORP":"BEECH",
"BEECH AIRCRAFT CORPORATION":"BEECH",
"BEECHCRAFT":"BEECH",
"BEECHCRAFT CORPORATION":"BEECH",
"BEECHER":"BEECH"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned.loc[:,"Make"] = df_aircraft_cleaned.loc[:,"Make"].replace({


In [34]:
pd.set_option('display.max_rows', None)

In [35]:
df_aircraft_cleaned.loc[:,"Make"].value_counts().head(10)

CESSNA      27213
PIPER       14934
BEECH        5413
BOEING       2824
BELL         2722
ROBINSON     1682
GRUMMAN      1647
MOONEY       1334
BELLANCA     1045
HUGHES        932
Name: Make, dtype: int64

In [36]:
df_aircraft_cleaned.loc[:,"Make"].value_counts().head(15)

CESSNA               27213
PIPER                14934
BEECH                 5413
BOEING                2824
BELL                  2722
ROBINSON              1682
GRUMMAN               1647
MOONEY                1334
BELLANCA              1045
HUGHES                 932
AIR TRACTOR            918
SCHWEIZER              773
AERONCA                636
MCDONNELL DOUGLAS      608
MAULE                  589
Name: Make, dtype: int64

In [37]:
df_aircraft_cleaned['Investigation.Type'].value_counts()

Accident    84979
Incident     3847
Name: Investigation.Type, dtype: int64

In [38]:
df_aircraft_cleaned['Model'].value_counts()

152                     2367
172                     1756
172N                    1164
PA-28-140                932
150                      829
172M                     798
172P                     689
182                      659
180                      622
150M                     585
PA-18                    581
PA-18-150                578
PA-28-180                572
PA-28-161                569
PA-28-181                532
206B                     524
737                      489
PA-38-112                469
150L                     461
G-164A                   460
A36                      450
G-164B                   420
140                      402
170B                     389
206                      389
172S                     373
R44                      361
182P                     356
PA-32-300                356
PA-24-250                352
269C                     341
PA-28R-200               335
PA-12                    324
A188B                    318
PA-23-250     

In [39]:
df_aircraft_cleaned.head()

Unnamed: 0,Event.Id,Investigation.Type,Accident.Number,Event.Date,Location,Country,Latitude,Longitude,Airport.Code,Airport.Name,...,Purpose.of.flight,Air.carrier,Total.Fatal.Injuries,Total.Serious.Injuries,Total.Minor.Injuries,Total.Uninjured,Weather.Condition,Broad.phase.of.flight,Report.Status,Publication.Date
0,20001218X45444,Accident,SEA87LA080,1948-10-24,"MOOSE CREEK, ID",United States,,,,,...,Personal,,2.0,0.0,0.0,0.0,UNK,Cruise,Probable Cause,
1,20001218X45447,Accident,LAX94LA336,1962-07-19,"BRIDGEPORT, CA",United States,,,,,...,Personal,,4.0,0.0,0.0,0.0,UNK,Unknown,Probable Cause,19-09-1996
2,20061025X01555,Accident,NYC07LA005,1974-08-30,"Saltville, VA",United States,36.922223,-81.878056,,,...,Personal,,3.0,,,,IMC,Cruise,Probable Cause,26-02-2007
3,20001218X45448,Accident,LAX96LA321,1977-06-19,"EUREKA, CA",United States,,,,,...,Personal,,2.0,0.0,0.0,0.0,IMC,Cruise,Probable Cause,12-09-2000
4,20041105X01764,Accident,CHI79FA064,1979-08-02,"Canton, OH",United States,,,,,...,Personal,,1.0,2.0,,0.0,VMC,Approach,Probable Cause,16-04-1980


In [40]:
df_aircraft_cleaned['Make_Model'] = df_aircraft_cleaned['Make']+ " "+ df_aircraft_cleaned['Model']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned['Make_Model'] = df_aircraft_cleaned['Make']+ " "+ df_aircraft_cleaned['Model']


In [41]:
df_aircraft_cleaned.head()

Unnamed: 0,Event.Id,Investigation.Type,Accident.Number,Event.Date,Location,Country,Latitude,Longitude,Airport.Code,Airport.Name,...,Air.carrier,Total.Fatal.Injuries,Total.Serious.Injuries,Total.Minor.Injuries,Total.Uninjured,Weather.Condition,Broad.phase.of.flight,Report.Status,Publication.Date,Make_Model
0,20001218X45444,Accident,SEA87LA080,1948-10-24,"MOOSE CREEK, ID",United States,,,,,...,,2.0,0.0,0.0,0.0,UNK,Cruise,Probable Cause,,STINSON 108-3
1,20001218X45447,Accident,LAX94LA336,1962-07-19,"BRIDGEPORT, CA",United States,,,,,...,,4.0,0.0,0.0,0.0,UNK,Unknown,Probable Cause,19-09-1996,PIPER PA24-180
2,20061025X01555,Accident,NYC07LA005,1974-08-30,"Saltville, VA",United States,36.922223,-81.878056,,,...,,3.0,,,,IMC,Cruise,Probable Cause,26-02-2007,CESSNA 172M
3,20001218X45448,Accident,LAX96LA321,1977-06-19,"EUREKA, CA",United States,,,,,...,,2.0,0.0,0.0,0.0,IMC,Cruise,Probable Cause,12-09-2000,ROCKWELL 112
4,20041105X01764,Accident,CHI79FA064,1979-08-02,"Canton, OH",United States,,,,,...,,1.0,2.0,,0.0,VMC,Approach,Probable Cause,16-04-1980,CESSNA 501


In [36]:
df_aircraft_cleaned['Make_Model'].value_counts()

CESSNA 152                                            2367
CESSNA 172                                            1754
CESSNA 172N                                           1164
PIPER PA-28-140                                        932
CESSNA 150                                             829
CESSNA 172M                                            798
CESSNA 172P                                            689
CESSNA 182                                             659
CESSNA 180                                             621
CESSNA 150M                                            585
PIPER PA-18                                            580
PIPER PA-18-150                                        577
PIPER PA-28-180                                        572
PIPER PA-28-161                                        569
PIPER PA-28-181                                        532
BELL 206B                                              516
BOEING 737                                             4

In [37]:
df_aircraft_cleaned['Purpose.of.flight'].value_counts()

Personal                     49436
Instructional                10601
Unknown                       6794
Aerial Application            4712
Business                      4018
Positioning                   1646
Other Work Use                1264
Ferry                          812
Aerial Observation             794
Public Aircraft                720
Executive/corporate            553
Flight Test                    404
Skydiving                      182
External Load                  123
Public Aircraft - Federal      105
Banner Tow                     101
Air Race show                   99
Public Aircraft - Local         74
Public Aircraft - State         64
Air Race/show                   59
Glider Tow                      53
Firefighting                    40
Air Drop                        11
ASHO                             6
PUBS                             4
PUBL                             1
Name: Purpose.of.flight, dtype: int64

In [38]:
df_aircraft_cleaned['Purpose.of.flight'].value_counts()

Personal                     49436
Instructional                10601
Unknown                       6794
Aerial Application            4712
Business                      4018
Positioning                   1646
Other Work Use                1264
Ferry                          812
Aerial Observation             794
Public Aircraft                720
Executive/corporate            553
Flight Test                    404
Skydiving                      182
External Load                  123
Public Aircraft - Federal      105
Banner Tow                     101
Air Race show                   99
Public Aircraft - Local         74
Public Aircraft - State         64
Air Race/show                   59
Glider Tow                      53
Firefighting                    40
Air Drop                        11
ASHO                             6
PUBS                             4
PUBL                             1
Name: Purpose.of.flight, dtype: int64

In [39]:
df_aircraft_cleaned.isna().sum()

Event.Id                      0
Investigation.Type            0
Accident.Number               0
Event.Date                    0
Location                     52
Country                     225
Latitude                  54466
Longitude                 54475
Airport.Code              38591
Airport.Name              36051
Injury.Severity             983
Aircraft.damage            3176
Aircraft.Category         56551
Registration.Number        1279
Make                          0
Model                        49
Amateur.Built               100
Number.of.Engines          6035
Engine.Type                7026
FAR.Description           56854
Schedule                  76283
Purpose.of.flight          6150
Air.carrier               72202
Total.Fatal.Injuries      11394
Total.Serious.Injuries    12500
Total.Minor.Injuries      11922
Total.Uninjured            5901
Weather.Condition          4454
Broad.phase.of.flight     27113
Report.Status              6349
Publication.Date          15227
Make_Mod

In [44]:

df_aircraft_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 88826 entries, 0 to 90347
Data columns (total 32 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Event.Id                88826 non-null  object 
 1   Investigation.Type      88826 non-null  object 
 2   Accident.Number         88826 non-null  object 
 3   Event.Date              88826 non-null  object 
 4   Location                88774 non-null  object 
 5   Country                 88601 non-null  object 
 6   Latitude                34360 non-null  object 
 7   Longitude               34351 non-null  object 
 8   Airport.Code            50235 non-null  object 
 9   Airport.Name            52775 non-null  object 
 10  Injury.Severity         87843 non-null  object 
 11  Aircraft.damage         85650 non-null  object 
 12  Aircraft.Category       32275 non-null  object 
 13  Registration.Number     87547 non-null  object 
 14  Make                    88826 non-null

In [47]:
df_aircraft_cleaned['Engine.Type'].replace('unk', 'Unknown', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aircraft_cleaned['Engine.Type'].replace('unk', 'Unknown', inplace=True)
