In [21]:
import pandas as pd
import numpy as np
import datetime
from matplotlib import rcParams
import plotly.express as px

In [22]:
columns = {
        'ocorrencia_latitude':'latitude',
        'ocorrencia_longitude':'longitude',
        'ocorrencia_dia' : 'date',
        'ocorrencia_classificacao':'classification',
        'ocorrencia_tipo':'type',
        'ocorrencia_tipo_categoria':'category_type',
        'ocorrencia_tipo_icao':'icao_type',
        'ocorrencia_aerodromo':'aerodrome',
        'ocorrencia_cidade':'city',
        'investigacao_status':'status',
        'divulgacao_relatorio_numero':'report_number',
        'total_aeronaves_envolvidas':'aircraft_involved',
        'ocorrencia_horario':'hour_of_incident',
        'ocorrencia_uf':'state',
        'ocorrencia_saida_pista':'rwy_excursion'
    }

df = pd.read_csv('ocorrencias_aviacao.csv')
df = df.drop(columns=['ocorrencia_pais','investigacao_aeronave_liberada','divulgacao_relatorio_publicado','divulgacao_dia_publicacao','total_recomendacoes','ocorrencia_dia_extracao'],axis=1)
df = df.rename(columns=columns)


In [23]:
df.columns

Index(['codigo_ocorrencia', 'classification', 'type', 'category_type',
       'icao_type', 'latitude', 'longitude', 'city', 'state', 'aerodrome',
       'date', 'hour_of_incident', 'status', 'report_number',
       'aircraft_involved', 'rwy_excursion'],
      dtype='object')

#### Create a new column 'ocorrencia_data'

In [24]:
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

#### 'ocorrencia_horario' to datetime

In [25]:
df['hour_of_incident'] = pd.to_datetime(df['hour_of_incident'],format='%H:%M:%S')

#### Change ocorrencia_saida_pista to 0 and 1

In [26]:
df['rwy_excursion'] = df['rwy_excursion'].apply(lambda x: 1 if x == 'SIM' else 0)

#### Change CLASSIFICATION from ||| ACIDENTE,INCIDENTE,INCEDENTE GRAVE ||| to ||| ACCIDENT, INCIDENT, SERIOUS INCIDENT |||

In [27]:
df['classification'] = df['classification'].apply(lambda x: 'INCIDENT' if x == 'INCIDENTE' else 'ACCIDENT' if x == 'ACIDENTE' else 'SERIOUS INCIDENT')

#### Outliers clean (discards a row by checking if latitude and longitude are inside Brasil)

##### Outlier identification

In [28]:
id_list_lon = df.loc[df['longitude'] > -32.500].index.values.tolist()
id_list_lat = df.loc[df['latitude'] > 5.0900].index.values.tolist()

merge_list =[]
merge_list.extend(id_list_lon)
merge_list.extend(id_list_lat)
merge_list = list(dict.fromkeys(merge_list))

len(merge_list)

94

##### Outlier Drop

In [29]:
df.drop(merge_list,axis=0,inplace=True)

#### Export new dataset

In [30]:
df.to_csv('./aeronautical_incidents.csv',index=False)

#### Filter by incident type

In [31]:
df.loc[df['type']=='ESTOURO DE PNEU']

Unnamed: 0,codigo_ocorrencia,classification,type,category_type,icao_type,latitude,longitude,city,state,aerodrome,date,hour_of_incident,status,report_number,aircraft_involved,rwy_excursion
19,201407101908656,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,2.9938,-61.3140,ALTO ALEGRE,RR,SWUQ,2014-07-10,1900-01-01 14:15:00,FINALIZADA,***,1,0
34,201407141681337,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-23.0091,-44.3196,ANGRA DOS REIS,RJ,SDAG,2014-07-14,1900-01-01 19:25:00,FINALIZADA,***,1,0
37,201105165154118,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,2.8167,-60.6705,BOA VISTA,RR,SBBV,2011-05-16,1900-01-01 17:42:00,FINALIZADA,***,1,0
66,201211015848731,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-15.7941,-47.8879,BRASÍLIA,DF,SBBR,2012-11-01,1900-01-01 18:00:00,FINALIZADA,***,1,0
67,200908303419146,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-15.7941,-47.8879,BRASÍLIA,DF,****,2009-08-30,1900-01-01 16:35:00,FINALIZADA,***,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5146,201001207785633,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-18.9190,-48.2780,UBERLÂNDIA,MG,****,2010-01-20,1900-01-01 18:58:00,FINALIZADA,***,1,0
5152,201507077498699,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-19.7647,-47.9661,UBERLÂNDIA,MG,SBUL,2015-07-07,1900-01-01 22:50:00,FINALIZADA,***,1,0
5158,201404057400727,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-18.9190,-48.2780,UBERLÂNDIA,MG,SBUL,2014-04-05,1900-01-01 11:32:00,FINALIZADA,***,1,0
5166,201309261625993,INCIDENT,ESTOURO DE PNEU,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,-18.9190,-48.2780,UBERLÂNDIA,MG,SBUL,2013-09-26,1900-01-01 21:27:00,FINALIZADA,***,1,0


#### Filter by classification

In [32]:
df.loc[(df['date'].dt.year == 2017) & (df['classification'].isin(['ACCIDENT','']))]

Unnamed: 0,codigo_ocorrencia,classification,type,category_type,icao_type,latitude,longitude,city,state,aerodrome,date,hour_of_incident,status,report_number,aircraft_involved,rwy_excursion
210,201711152026161,ACCIDENT,POUSO SEM TREM,CONTATO ANORMAL COM A PISTA,ARC,-10.8706,-61.8467,JI-PARANÁ,RO,SBJI,2017-11-11,1900-01-01 20:40:00,ATIVA,A-141/CENIPA/2017,1,0
216,201706241353561,ACCIDENT,CAUSADO POR FENÔMENO METEOROLÓGICO EM VOO,OUTROS,OTHR,3.2483,-60.2467,BONFIM,RR,SSJL,2017-06-21,1900-01-01 21:00:00,FINALIZADA,A-087/CENIPA/2017,1,0
217,201710191757029,ACCIDENT,FALHA DO MOTOR EM VOO,FALHA OU MAU FUNCIONAMENTO DO MOTOR,SCF-PP,3.3503,-59.8780,BONFIM,RR,****,2017-10-16,1900-01-01 11:40:00,FINALIZADA,***,1,0
218,201707050515445,ACCIDENT,FALHA DO MOTOR EM VOO,FALHA OU MAU FUNCIONAMENTO DO MOTOR,SCF-PP,2.7917,-60.5900,CANTÁ,RR,SWPD,2017-07-03,1900-01-01 15:00:00,ATIVA,A-089/CENIPA/2017,1,0
255,201706152048494,ACCIDENT,FALHA DO MOTOR EM VOO,FALHA OU MAU FUNCIONAMENTO DO MOTOR,SCF-PP,2.1111,-62.4453,CARACARAÍ,RR,SJLU,2017-06-14,1900-01-01 18:00:00,ATIVA,A-085/CENIPA/2017,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5048,201704031537231,ACCIDENT,INDETERMINADO,INDETERMINADO,UNK,-23.4611,-47.4900,SOROCABA,SP,****,2017-03-31,1900-01-01 17:45:00,ATIVA,A-054/CENIPA/2017,1,0
5082,201711271331205,ACCIDENT,EXCURSÃO DE PISTA,EXCURSÃO DE PISTA,RE,-21.2572,-49.7942,UBARANA,SP,SDTO,2017-11-25,1900-01-01 11:30:00,ATIVA,A-151/CENIPA/2017,1,1
5086,201707061920096,ACCIDENT,POUSO LONGO,EXCURSÃO DE PISTA,RE,-23.4414,-45.0761,UBATUBA,SP,SDUB,2017-07-04,1900-01-01 20:20:00,ATIVA,A-090/CENIPA/2017,1,1
5106,201709121317043,ACCIDENT,PERDA DE CONTROLE EM VOO,PERDA DE CONTROLE EM VOO,LOC-I,-19.8628,-49.7739,SÃO FRANCISCO DE SALES,MG,****,2017-09-02,1900-01-01 18:30:00,FINALIZADA,***,1,0
