In [6]:
# Imported Dependencies
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX # Predictive analysis model used

In [None]:
# Load and preprocess data
file = pd.read_csv("suicide_all.csv")

# Remove NA values
file = file.dropna()
# Remove unneeded columns
file = file.drop(columns=['num_suicide_male', 'num_suicide_female', 'suicide_rate_male', 'suicide_rate_female'])

file['year'] = pd.to_datetime(file['year'], format='%Y')
file.set_index('year', inplace=True)


spike_years=['1983', '1998']
num_total_suicide_data = file.copy()
plot_title = "Total Number of Suicides in Japan"
column = "num_suicide_total"

# Instantiate SARIMAX model
model = SARIMAX(num_total_suicide_data[column], order=(1, 1, 1), seasonal_order=(0, 0, 0, 0))
result = model.fit()

# Generate forecast
num_total_suicide_data['Forecast'] = result.predict(start='1978', end='2022')

# Check for spike in a specified year 
year_to_check = spike_years[0]
actual_value = num_total_suicide_data.at[pd.to_datetime(year_to_check), column]
forecasted_value = num_total_suicide_data.at[pd.to_datetime(year_to_check), 'Forecast']
spike = actual_value - forecasted_value
if spike > 0:
    print(f"In {year_to_check}, the actual value was {actual_value:.5f}, forecasted was {forecasted_value:.5f}, spike: {spike:.5f}")
else:
    print(f"In {year_to_check}, no spike detected.")

# Plot figure
plt.figure(figsize=(20, 12))
num_total_suicide_data[column].plot(legend=True, color='blue', label='Actual Data')
num_total_suicide_data['Forecast'].plot(legend=True, color='red', label='Forecast')
plt.title(plot_title)
plt.xlabel('Year')
plt.ylabel('Number of Suicides')

# Plot noticed spikes
for spike_year in spike_years:
    plt.axvline(pd.to_datetime(spike_year), color='green' if spike_year == '1983' else 'orange', linestyle='--', label=f'{spike_year} Spike')
plt.legend()
plt.show()




In [None]:
spike_years=['1983', '1998']
age_group_40_49 = file.copy()
plot_title = "Total Number of Suicides in Japan (ages 40-49)"
column = "num_suicide_age_40_49"

# Fit SARIMAX model on the specified data column
model = SARIMAX(age_group_40_49[column], order=(1, 1, 1), seasonal_order=(0, 0, 0, 0))
result = model.fit()

# Generate forecast
age_group_40_49['Forecast'] = result.predict(start='1978', end='2022')

# Check for spike in a specified year 
year_to_check = spike_years[0]
actual_value = age_group_40_49.at[pd.to_datetime(year_to_check), column]
forecasted_value = age_group_40_49.at[pd.to_datetime(year_to_check), 'Forecast']
spike = actual_value - forecasted_value
if spike > 0:
    print(f"In {year_to_check}, the actual value was {actual_value:.5f}, forecasted was {forecasted_value:.5f}, spike: {spike:.5f}")
else:
    print(f"In {year_to_check}, no spike detected.")

# Plot figure
plt.figure(figsize=(20, 12))
age_group_40_49[column].plot(legend=True, color='blue', label='Actual Data')
age_group_40_49['Forecast'].plot(legend=True, color ='red', label='Forecast')
plt.title(age_group_40_49)
plt.xlabel('Year')
plt.ylabel('Number of Suicides')
for spike_year in spike_years:
    plt.axvline(pd.to_datetime(spike_year), color='green' if spike_year == '1983' else 'orange', linestyle='--', label=f'{spike_year} Spike')
plt.legend()
plt.show()

In [None]:
spike_years=['1983', '1998']
age_group_50_59 = file.copy()
plot_title = "Total Number of Suicides in Japan (ages 50-59)"
column = "num_suicide_age_50_59"

# Fit SARIMAX model on the specified data column
model = SARIMAX(age_group_50_59[column], order=(1, 1, 1), seasonal_order=(0, 0, 0, 0))
result = model.fit()

# Generate forecast
age_group_50_59['Forecast'] = result.predict(start='1978', end='2022')

# Check for spike in a specified year 
year_to_check = spike_years[0]
actual_value = age_group_50_59.at[pd.to_datetime(year_to_check), column]
forecasted_value = age_group_50_59.at[pd.to_datetime(year_to_check), 'Forecast']
spike = actual_value - forecasted_value
if spike > 0:
    print(f"In {year_to_check}, the actual value was {actual_value:.5f}, forecasted was {forecasted_value:.5f}, spike: {spike:.5f}")
else:
    print(f"In {year_to_check}, no spike detected.")

# Plotting
plt.figure(figsize=(20, 12))
age_group_50_59[column].plot(legend=True, color='blue', label='Actual Data')
age_group_50_59['Forecast'].plot(legend=True, color='red', label='Forecast')
plt.title(plot_title)
plt.xlabel('Year')
plt.ylabel('Number of Suicides')
for spike_year in spike_years:
    plt.axvline(pd.to_datetime(spike_year), color='green' if spike_year == '1983' else 'orange', linestyle='--', label=f'{spike_year} Spike')
plt.legend()
plt.show()

In [None]:
# Plot all age groups in one chart

# Put all column names and descriptions in one dictionary
age_groups = {
    "num_suicide_age_0_19": "Age 0-19",
    "num_suicide_age_20_29": "Age 20-29",
    "num_suicide_age_30_39": "Age 30-39",
    "num_suicide_age_40_49": "Age 40-49",
    "num_suicide_age_50_59": "Age 50-59",
    "num_suicide_60_plus": "Age 60+"
}

# Plot graphs with each age group being a line on the graph
plt.figure(figsize=(12, 6))
for age, label in age_groups.items():
    plt.plot(file.index, file[age], label=label)

plt.axvline(pd.Timestamp('1983-01-01'), color='green', linestyle='--', label='1983 Spike')
plt.axvline(pd.Timestamp('1998-01-01'), color='orange', linestyle='--', label='1998 Spike')
plt.title("Suicide Totals in Japan by Age Group (1978-2022)")
plt.xlabel("Year")
plt.ylabel("Total Suicides")
plt.legend(loc="upper left", bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()