In [14]:
import pandas as pd
pd.set_option('display.precision', 10)
import numpy as np
np.set_printoptions(precision=10)
df_crashes=pd.read_csv("Airplane_crashes.csv", delimiter=';')

In [None]:
df_crashes["Date"]=pd.to_datetime(df_crashes["Date"],dayfirst=True)
df_crashes["Time"]=df_crashes["Time"].str.extract(r'(\d{1,2}:\d{2})')[0]
df_crashes["Time"]=pd.to_datetime(df_crashes["Time"],format='%H:%M', errors='coerce').dt.time
df_crashes = df_crashes[df_crashes['Aboard'] > 0]
df_crashes = df_crashes.dropna(subset=['Aboard', 'Fatalities'])
df_crashes.reset_index(drop=True)


In [16]:
df_crashes["Month"] = df_crashes["Date"].dt.month
df_crashes["Year"] = df_crashes["Date"].dt.year
df_crashes['Weekday'] = df_crashes['Date'].dt.day_name() 

In [17]:
df_crashes["Fatality_Rate"] = df_crashes["Fatalities"] / df_crashes["Aboard"]
df_crashes["Survival_Rate"] = 1 - df_crashes["Fatality_Rate"]

In [18]:
q_low = df_crashes['Survival_Rate'].quantile(0.25)
q_high = df_crashes['Survival_Rate'].quantile(0.75)

df_crashes['Category'] = df_crashes['Survival_Rate'].apply(
    lambda x: 'High Survival' if x >= q_high else ('High Fatality' if x <= q_low else 'Moderate')
)


In [19]:
def classify_impact(row):
    if row['Fatalities'] >= 100 and row['Survival_Rate'] < 0.1:
        return 'Mass Fatality'
    elif row['Fatalities'] >= 30 and row['Survival_Rate'] < 0.2:
        return 'Severe Crash'
    elif row['Fatalities'] > 0 and row['Survival_Rate'] >= 0.8:
        return 'Minor Fatality'
    elif row['Fatalities'] == 0:
        return 'All Survived'
    else:
        return 'Moderate Impact'

df_crashes['Impact_Category'] = df_crashes.apply(classify_impact, axis=1)

In [20]:
annual = df_crashes.groupby('Year').agg({
    'Date': 'count',  # number of accidents
    'Aboard': 'sum',
    'Fatalities': 'sum'
}).rename(columns={'Date': 'Num_Accidents'})
annual['Fatality_Rate'] = annual['Fatalities'] / annual['Aboard']
annual['Survival_Rate'] = 1 - annual['Fatality_Rate']

In [21]:
annual['Accidents_Rolling'] = annual['Num_Accidents'].rolling(window=5).mean()
annual['Change'] = annual['Num_Accidents'].diff()
annual['Change_Z'] = (annual['Change'] - annual['Change'].mean()) / annual['Change'].std()
annual['Num_Accidents_Z']=(annual['Num_Accidents'] - annual['Num_Accidents'].mean()) / annual['Num_Accidents'].std()

In [22]:
df_crashes.to_csv('crashes.csv')
annual.to_csv('annual.csv')