In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

df = pd.read_csv('global-fatalities-from-aviation-accidents-and-hijackings.csv')


In [None]:
def preprocess(df, year_col, value_cols):
    df = df.dropna(subset=[year_col] + value_cols)
    df = df.drop_duplicates()
    df[year_col] = pd.to_datetime(df[year_col], format='%Y', errors='coerce').dt.year
    df = df.dropna(subset=[year_col])
    df['Total Fatalities'] = df[value_cols].sum(axis=1)
    df = df[df['Total Fatalities'] > 0]
    df = df.groupby(year_col)['Total Fatalities'].sum().reset_index()
    df.rename(columns={year_col: 'Year', 'Total Fatalities': 'Fatalities'}, inplace=True)
    return df

cleaned_df = preprocess(df, 'Year', ['Hijacking fatalities', 'Fatalities from commercial airliners'])
cleaned_df.head()


In [None]:
worst_year = cleaned_df.loc[cleaned_df['Fatalities'] == cleaned_df['Fatalities'].max(), 'Year'].values[0]
worst_value = int(cleaned_df['Fatalities'].max())

best_year = cleaned_df.loc[cleaned_df['Fatalities'] == cleaned_df['Fatalities'].min(), 'Year'].values[0]
best_value = int(cleaned_df['Fatalities'].min())

print(f"Best Year: {best_year}, Fatalities: {best_value}")
print(f"Worst Year: {worst_year}, Fatalities: {worst_value}")


In [None]:
plt.figure(figsize=(14, 7))
colors = ['green' if year == best_year else 'red' if year == worst_year else 'gray' for year in cleaned_df['Year']]
plt.bar(cleaned_df['Year'], cleaned_df['Fatalities'], color=colors)
plt.title('Aviation Fatalities: All Years with Highlighted Best and Worst Years')
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')
plt.axvline(x=best_year, color='green', linestyle='--', label=f'Best Year: {best_year}')
plt.axvline(x=worst_year, color='red', linestyle='--', label=f'Worst Year: {worst_year}')
plt.legend()
plt.grid(axis='y')
plt.show()


In [None]:
y = cleaned_df['Fatalities']
model = ARIMA(y, order=(1, 1, 1))
model_fit = model.fit()


In [None]:
forecast_years = list(range(cleaned_df['Year'].iloc[-1] + 1, 2031))
forecast = model_fit.forecast(steps=len(forecast_years))

forecast_df = pd.DataFrame({'Year': forecast_years, 'Fatalities': forecast})
forecast_df.head()


In [None]:
plt.figure(figsize=(14, 7))
plt.bar(forecast_df['Year'], forecast_df['Fatalities'], color='blue', label='Forecast Fatalities')
plt.title('Aviation Fatalities Forecast to 2030 (ARIMA Model)')
plt.xlabel('Year')
plt.ylabel('Predicted Number of Fatalities')
plt.legend()
plt.grid(axis='y')
plt.show()


In [None]:
print("Model Forecast to 2030:")
for i, row in forecast_df.iterrows():
    print(f"Year: {row['Year']}, Predicted Fatalities: {int(row['Fatalities'])}")
