In [None]:
# install Pint if necessary
try:
  from pint import UnitRegistry
except ImportError:
  !pip install pint

In [None]:
# download modsim.py if necessary

from os.path import basename, exists


def download(url):
  filename = basename(url)
  if not exists(filename):
    from urllib.request import urlretrieve
    local, _ = urlretrieve(url, filename)
    print('Downloaded ' + local)


download('https://github.com/AllenDowney/ModSimPy/raw/master/' +
         'modsim.py')

In [None]:
import pandas as pd
from modsim import *
import matplotlib.pyplot as plt
import numpy as np
import random

<h2> Menghapus kolom yang tidak diperlukan </h2>

In [None]:
# Menghapus kolom yang tidak diperlukan
data_have_index = [
  "sample/anxiety-disorders-males-vs-females.csv",
  "sample/bipolar-disorder-in-males-vs-females.csv",
  "sample/depression-males-vs-females.csv",
  "sample/eating-disorders-in-males-vs-females.csv",
  "sample/Mental health Depression disorder Data.csv",
  "sample/schizophrenia-in-males-vs-females.csv"
]

for file_path in data_have_index:
  df = pd.read_csv(file_path)
  if 'index' in df.columns:
    df = df.drop('index', axis=1)
  df.to_csv(file_path, index=False)

In [None]:
df = pd.read_csv("sample/anxiety-disorders-males-vs-females.csv")
df

<h2>Cleaning data bunuh diri</h2>

In [None]:
df_grouped_bundir = pd.read_csv("sample/suicide-rates-among-young-people.csv")
df_grouped_bundir = df_grouped_bundir.groupby('Year').mean().reset_index()
df_grouped_bundir

In [None]:
df_grouped_bundir['Change_15_19'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19']
df_grouped_bundir['Change_20_24'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24']
df_grouped_bundir['Change_25_34'] = df_grouped_bundir[
  'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34']

# Menghitung rata2 bunuh diri dari 300000 ribu orang pertahun.
df_grouped_bundir['Percentage_Bundir'] = ((df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS15-19'] +
                                           df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS20-24'] +
                                           df_grouped_bundir[
                                             'Death rate per 100,000 people - Cause: Self-harm - Sex: Both sexes - Age group: YEARS25-34']) / 300000) * 100

# print(f"Minimal: {df_grouped_bundir[['Percentage_Bundir']].min()}")
# print(f"Maximal: {df_grouped_bundir[['Percentage_Bundir']].max()}")
df_grouped_bundir['Percentage_Bundir']

# Step cleaning data

1. Proses cleaning data dengan memperiksa apakah file ada yang missing atau tidak.
2. Mengisi data yang kosong atau NaN dengan method ffill ( Mengisi nilai kosong berdasarkan nilai didepannya).
3. Melakukan perubahan nama dalam tabel.
4. Melakukan filter data dari berdasar kan tahun.
5. Melakukan group berdasarkan tahun. dan dicari mean pertahun.
6. Step terakhir dijumlahkan mean female dan male.

<h3>Anxiety</h3>

In [None]:
df_anxiety = pd.read_csv("sample/anxiety-disorders-males-vs-females.csv")
df_anxiety.isnull().sum().sum()

In [None]:
df_anxiety = df_anxiety.fillna(method='ffill')
df_anxiety.isnull().sum().sum()

In [None]:
df_anxiety = df_anxiety.rename(
  columns={'Prevalence - Anxiety disorders - Sex: Female - Age: Age-standardized (Percent)': 'Anxiety_Female',
           'Prevalence - Anxiety disorders - Sex: Male - Age: Age-standardized (Percent)': 'Anxiety_Male'})

df_anxiety_filtered = df_anxiety[(df_anxiety['Year'] >= 1990) & (df_anxiety['Year'] <= 2017)]

df_anxiety_mean = df_anxiety_filtered.groupby('Year').agg({
  'Anxiety_Male': 'mean',
  'Anxiety_Female': 'mean'
})

total_df_anxiety_mean = df_anxiety_mean['Anxiety_Female'] + df_anxiety_mean['Anxiety_Male']

total_df_anxiety_mean

<h3>Bipolar</h3>

In [None]:
df_bipolar = pd.read_csv("sample/bipolar-disorder-in-males-vs-females.csv")
df_bipolar.isnull().sum()

In [None]:
df_bipolar = df_bipolar.fillna(method='ffill')
df_bipolar.isnull().sum()

In [None]:
df_bipolar = df_bipolar.rename(
  columns={'Prevalence - Bipolar disorder - Sex: Male - Age: Age-standardized (Percent)': 'Bipolar_Male',
           'Prevalence - Bipolar disorder - Sex: Female - Age: Age-standardized (Percent)': 'Bipolar_Female'})

df_bipolar_filtered = df_bipolar[(df_bipolar['Year'] >= 1990) & (df_bipolar['Year'] <= 2017)]

df_bipolar_mean = df_bipolar_filtered.groupby('Year').agg({
  'Bipolar_Male': 'mean',
  'Bipolar_Female': 'mean'
})

total_df_bipolar_mean = df_bipolar_mean['Bipolar_Male'] + df_bipolar_mean['Bipolar_Female']

total_df_bipolar_mean

<h3>Depression</h3>

In [None]:
df_depression = pd.read_csv("sample/depression-males-vs-females.csv")
df_depression.isnull().sum()

In [None]:
df_depression = df_depression.fillna(method='ffill')
df_depression.isnull().sum()

In [None]:
df_depression = df_depression.rename(
  columns={'Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)': 'Depression_Female',
           'Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)': 'Depression_Male'})

df_depression_filtered = df_depression[(df_depression['Year'] >= 1990) & (df_depression['Year'] <= 2017)]

df_depression_mean = df_depression_filtered.groupby('Year').agg({
  'Depression_Male': 'mean',
  'Depression_Female': 'mean'
})

total_df_depression_mean = df_depression_mean['Depression_Male'] + df_depression_mean['Depression_Female']

total_df_depression_mean

<h3>Schizo</h3>

In [None]:
df_schizo = pd.read_csv('sample/schizophrenia-in-males-vs-females.csv')
df_schizo.isnull().sum()

In [None]:

df_schizo = df_schizo.fillna(method='ffill')
df_schizo.isnull().sum()

In [None]:
df_schizo = df_schizo.rename(
  columns={'Prevalence - Schizophrenia - Sex: Female - Age: Age-standardized (Percent)': 'Schizo_Female',
           'Prevalence - Schizophrenia - Sex: Male - Age: Age-standardized (Percent)': 'Schizo_Male'})

df_schizo_filtered = df_schizo[(df_schizo['Year'] >= 1990) & (df_schizo['Year'] <= 2017)]

df_schizo_mean = df_schizo_filtered.groupby('Year').agg({
  'Schizo_Male': 'mean',
  'Schizo_Female': 'mean'
})

total_df_schizo_mean = df_schizo_mean['Schizo_Male'] + df_schizo_mean['Schizo_Female']

total_df_schizo_mean

## Eating

In [None]:
df_eating = pd.read_csv("sample/eating-disorders-in-males-vs-females.csv")
df_eating.isnull().sum()

In [None]:
df_eating = df_eating.fillna(method='ffill')
df_eating.isnull().sum()

In [None]:
df_eating = df_eating.rename(
  columns={'Prevalence - Eating disorders - Sex: Male - Age: Age-standardized (Percent)': 'Eating_Male',
           'Prevalence - Eating disorders - Sex: Female - Age: Age-standardized (Percent)': 'Eating_Female'})

df_eating_filtered = df_eating[(df_eating['Year'] >= 1990) & (df_eating['Year'] <= 2017)]

df_eating_mean = df_eating_filtered.groupby('Year').agg({
  'Eating_Male': 'mean',
  'Eating_Female': 'mean'
})

total_df_eating_mean = df_eating_mean['Eating_Male'] + df_eating_mean['Eating_Female']

total_df_eating_mean

In [None]:
# dictionary yang memetakan setiap penyakit mental ke rentang prevalensinya (dalam persentase) untuk laki-laki dan perempuan.
# Nilai-nilai ini diambil dari data diatas diambil terkecil dan terbesar..
prevalence_range = {
  'schizophrenia': {'male': (total_df_schizo_mean.min(), total_df_schizo_mean.max()),
                    'female': (total_df_schizo_mean.min(), total_df_schizo_mean.max())},
  'anxiety': {'male': (total_df_anxiety_mean.min(), total_df_anxiety_mean.max()),
              'female': (total_df_anxiety_mean.min(), total_df_anxiety_mean.max())},
  'bipolar': {'male': (total_df_bipolar_mean.min(), total_df_bipolar_mean.max()),
              'female': (total_df_bipolar_mean.min(), total_df_bipolar_mean.max())},
  'eating_disorders': {'male': (total_df_eating_mean.min(), total_df_eating_mean.max()),
                       'female': (total_df_eating_mean.min(), total_df_eating_mean.max())},
  'depression': {'male': (total_df_depression_mean.min(), total_df_depression_mean.max()),
                 'female': (total_df_depression_mean.min(), total_df_depression_mean.max())},
}

# asumsi penyakit mental ke probabilitas bunuh diri yang terkait
suicide_risk = {
  'schizophrenia': 0.1,
  'anxiety': 0.05,
  'bipolar': 0.08,
  'eating_disorders': 0.02,
  'depression': 0.07,
}

populations = [(489128, 350000)]

In [None]:
# System
system = System(prevalence_range=prevalence_range, suicide_risk=suicide_risk, num_points=random.randint(3, 23),
                iterations=1000)
# State
state = State(results_male=[], results_female=[], populations=populations, predictions=[])

## Monte Carlo simulation
Penjelasan:

1. Setelah looping menginisialisasikan `min_val` dan `max_val` yang diambil dari hasil menghitung rata2 penyakit mental dari 100 ribu orang pertahunnya.
2. `preval_values` akan memiliki sebuah array yang dimulai dari `min_val` sampai `max_val` dengan panjang `system.num_points`.
3. `num_male_with_disease` menghitung jumlah laki laki atau permupan dengan penyakit saat ini. Dengan asumsi populasi dan `preval_value` dipilih secara acak. Nilai akan dikalikan dan dibagi 100.
4. `num_suicide_male` mensimulasikan jumlah kasus bunuuh diri diantara laki2 atau perempuan saat ini. Binomial akan menghasilkan bilangan acak pada kasus bunuh diri berdasarkan distribusi yang diberikan

In [None]:
def run_simulation(state, system):
  total_suicide_male = 0
  total_suicide_female = 0

  # Simulasi untuk laki-laki
  for disease in system.prevalence_range.keys():
    min_val, max_val = system.prevalence_range[disease]['male']
    preval_values = np.linspace(min_val, max_val, system.num_points)
    num_male_with_disease = int(pop_male * np.random.choice(preval_values) / 100)
    num_suicide_male = np.random.binomial(num_male_with_disease, system.suicide_risk[disease])
    total_suicide_male += num_suicide_male

  # Simulasi untuk perempuan
  for disease in system.prevalence_range.keys():
    min_val, max_val = system.prevalence_range[disease]['female']
    preval_values = np.linspace(min_val, max_val, system.num_points)
    num_female_with_disease = int(pop_female * np.random.choice(preval_values) / 100)
    num_suicide_female = np.random.binomial(num_female_with_disease, system.suicide_risk[disease])
    total_suicide_female += num_suicide_female

  return total_suicide_male, total_suicide_female

## Sweepseries

Fungsi `run_simulation_sweepseries()` bekerja sama dengan run simulation diatas, akan tetapi disini untuk probabilitas dan populasi sudah dideklarasikan. </br>
    
Parameter: </br>
p1: PRobabilitass kematian </br>
system: Tempat menyimpan variabel contatnt. </br>
pop_male & pop_female: populasi laki2 dan perempuan.  </br>

In [None]:
def run_simulation_sweepseries(p1, system, pop_male, pop_female):
 
  state = State(total_suicide_male=0, total_suicide_female=0, total_alive_male=pop_male, total_alive_female=pop_female)

  for disease in system.prevalence_range.keys():
    num_male_with_disease = int(state.total_alive_male * p1 / 100)
    state.total_suicide_male += np.random.binomial(num_male_with_disease, system.suicide_risk[disease])

    num_female_with_disease = int(state.total_alive_female * p1 / 100)
    state.total_suicide_female += np.random.binomial(num_female_with_disease, system.suicide_risk[disease])

  return state

## Sweepseries run simulation

Mensimulasikan dan memplot angka bunuh diri pria berdasarkan berbagai tingkat prevalensi penyakit schizophrenia dalam populasi. Dimulai dengan menetapkan populasi pria (200.000) dan wanita (350.000), serta membuat array p1_array yang berisi 55 nilai prevalensi schizophrenia dari yang terendah hingga tertinggi. 

In [None]:
pop_male, pop_female = 200000, 350000
p1_array = np.linspace(total_df_schizo_mean.min(), total_df_schizo_mean.max(), 55)

sweep_male = SweepSeries()

for p1 in p1_array:
  final_state = run_simulation_sweepseries(p1, system, pop_male, pop_female)
  sweep_male[p1] = final_state.total_suicide_male

  # print(sweep)

sweep_male.plot(label="Total bunuh diri Male", color="C4")
decorate(title='Perempuan dan Laki laki bunuh diri.',
         xlabel='Rate bunuh diri',
         ylabel='Total Bunuh diri laki laki')

## Timeseries
Timeseries ini menunjukan jumlah kasus bunuh diri yang disimulasikan untuk laki-laki dan perempuan selama jumlah iterasi yang ditentukan (1000).

In [None]:
def run_simulation_timeseries(state, system):
  totals = TimeSeries()
  for i in range(system.iterations):
    simulations = run_simulation(state, system)
    totals[i] = (simulations[0], simulations[1])
  return totals

In [None]:
result_timeseries_male = run_simulation_timeseries(state, system)
show(result_timeseries_male)

## Runing monte carlo simulation

In [None]:
for pop_male, pop_female in state.populations:
  state.results_male = np.zeros(system.iterations, dtype=int)
  state.results_female = np.zeros(system.iterations, dtype=int)

  for i in range(system.iterations):
    suicide_male, suicide_female = run_simulation(state, system)
    state.results_male[i] = int(suicide_male)
    state.results_female[i] = int(suicide_female)

  state.results_total = state.results_male + state.results_female

  # Plotting hasil simulasi
  plt.figure(figsize=(14, 7))
  plt.hist(state.results_male, bins=30, alpha=0.7, label='Male Suicides')
  plt.hist(state.results_female, bins=30, alpha=0.7, label='Female Suicides')
  plt.xlabel('Number of Suicides')
  plt.ylabel('Frequency')
  plt.grid(True)
  plt.title(
    f'Monte Carlo Simulation of Suicide Cases Due to Mental Health Issues (Population: {pop_male} males, {pop_female} females)')
  plt.legend()
  plt.show()

  # Summary statistics
  mean_male = np.mean(state.results_male)
  mean_female = np.mean(state.results_female)
  print(f'Average suicides in males: {mean_male}')
  print(f'Average suicides in females: {mean_female}')

## Helper function untuk predict

1. `convert_arr_string_into_integer(arr)`
Fungsi ini digunakan untuk mengonversi setiap elemen dalam daftar dari string menjadi integer. Ini diperlukan ketika data awalnya dalam format string dan perlu diubah menjadi integer untuk analisis numerik.

In [None]:
# np.max(state.results_total)
state.results_male

## Simulasi predict

f(x) = 0.000354334x + 5.66162

In [None]:
a = 0.000354334
b = 5.66162

In [None]:
def linear_predict(x):
  return a * x + b

## Function Helper

In [None]:
def convert_string_into_int(x):
  return [int(i) for i in x]

In [None]:
def run_predict(year, populasi, rentang):
  populate = np.linspace(populasi[0], populasi[1], rentang)
  tahun = year[1] - year[0]
  results = []
  
  for _ in range(tahun):
    temp = random.choice(populate)
    results.append(linear_predict(temp) * 1000)
    
  return results

In [None]:
# Input user
populasi = input("Masukan populasi eg(20000 40000): ")
year = input("Masukan rentang tahun eg(2000 3000): ")
rentang = int(input("Masukan rentang yang diinginkan untuk populasi: "))

total_populasi = convert_string_into_int(populasi.split(" "))
year_arr = convert_string_into_int(year.split(" "))

results_predict = run_predict(year_arr, total_populasi, rentang)

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(results_predict, label='ECDF Prediksi kematian untuk perempuan dan laki-laki')
plt.xlabel(f'Rentang tahun {year_arr}')
plt.ylabel('Populasi')
plt.grid(True)
plt.title(f'ECDF Prediksi kematian dalam rentang tahun {year[0]} sampai {year[1]}: dengan populasi {populasi[0]} sampai {populasi[1]}')
plt.legend()
plt.show()