In [2]:
import csv
import pandas as pd
import numpy as np

In [34]:
# Cholera
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9099871/

# Read the yearly dataset
yearly_data = pd.read_csv('./Yearly/yearly_cholera.csv') 

# Initialize empty lists to store monthly data
years = []
months = []
cases = []

# Iterate through each row in the yearly dataset
for index, row in yearly_data.iterrows():

    year = row['Year']
    
    total_deaths_yearly = row['Cases']
    
    # Generate random monthly variation around a typical seasonal pattern
    monthly_pattern = np.array([0.1, 0.1, 0.2, 0.25, 0.2, 0.1, 0.05, 0.05, 0.05, 0.015, 0.035, 0.025]) 
    monthly_variation = np.random.normal(1, 0.1, 12)
    monthly_pattern = monthly_pattern * monthly_variation
    monthly_pattern /= monthly_pattern.sum() # Normalize to sum to 1
    
    monthly_deaths = np.round(monthly_pattern * total_deaths_yearly).astype(int)
    
    # Append monthly data
    years.extend([year]*12)
    months.extend(range(1,13))
    cases.extend(monthly_deaths)

# Create DataFrame  
monthly_df = pd.DataFrame({'Year': years, 'Month': months, 'Cases': cases})

# Save to CSV
monthly_df.to_csv('./Monthly/monthly_cholera.csv', index=False)

In [40]:
# Dengue
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5583666/

yearly_data = pd.read_csv('./Yearly/yearly_dengue.csv') 
years = []
months = []
cases = []
for index, row in yearly_data.iterrows():

    year = row['Year']
    
    total_deaths_yearly = row['Cases']
    
    monthly_pattern = np.array([1.05 ,1.10 , 1.15, 1.05 ,0.95, 0.90,0.85 ,0.80 ,0.85, 0.9, 1 ,1.1]) 
    monthly_variation = np.random.normal(1, 0.1, 12)
    monthly_pattern = monthly_pattern * monthly_variation
    monthly_pattern /= monthly_pattern.sum()
    
    monthly_deaths = np.round(monthly_pattern * total_deaths_yearly).astype(int)
    
    years.extend([year]*12)
    months.extend(range(1,13))
    cases.extend(monthly_deaths)

monthly_df = pd.DataFrame({'Year': years, 'Month': months, 'Cases': cases})
monthly_df.to_csv('./Monthly/monthly_dengue.csv', index=False)

In [6]:
# Diarrhea
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6711541/

df = pd.read_csv('./Yearly/yearly_diarrhea.csv') 

monthly_data_all_years = pd.DataFrame(columns=['Year', 'Month', 'Deaths'])

for index, row in df.iterrows():
    year = row['Year']
    total_deaths = row['Deaths']

    np.random.seed(42)  # Set seed for reproducibility
    monthly_deaths = np.random.rand(12)
    monthly_deaths /= monthly_deaths.sum()
    monthly_deaths *= total_deaths

    # Create a DataFrame for monthly data for the current year
    monthly_data = pd.DataFrame({'Year': [year] * 12, 'Month': range(1, 13), 'Deaths': monthly_deaths})

    # Append the monthly data to the cumulative DataFrame
    monthly_data_all_years = pd.concat([monthly_data_all_years, monthly_data])

# Save the combined monthly data to a single CSV file
monthly_data_all_years.to_csv('./Monthly/monthly_diarrhea.csv', index=False)

In [7]:
# Pneumonia
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9387816/

df = pd.read_csv('./Yearly/yearly_pneumonia.csv')

monthly_data_all_years = pd.DataFrame(columns=['Year', 'Month', 'Deaths'])

for index, row in df.iterrows():
    year = row['Year']
    total_deaths = row['Deaths']

    np.random.seed(42)  # Set seed for reproducibility
    monthly_deaths = np.random.rand(12)
    monthly_deaths /= monthly_deaths.sum()
    monthly_deaths *= total_deaths

    # Create a DataFrame for monthly data for the current year
    monthly_data = pd.DataFrame({'Year': [year] * 12, 'Month': range(1, 13), 'Deaths': monthly_deaths})

    # Append the monthly data to the cumulative DataFrame
    monthly_data_all_years = pd.concat([monthly_data_all_years, monthly_data])

# Save the combined monthly data to a single CSV file
monthly_data_all_years.to_csv('./Monthly/monthly_pneumonia.csv', index=False)

In [44]:
# Tuberculosis
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3981520/

yearly_data = pd.read_csv('./Yearly/yearly_tuberculosis.csv') 

years = []
months = []
cases = []

for index, row in yearly_data.iterrows():

    year = row['Year']
    
    total_deaths_yearly = row['Cases']
    

    monthly_pattern = np.array([1.15, 1.12, 1.08, 1.03, 0.98, 0.94, 0.97, 1.01, 1.05, 1.09, 1.13, 1.18]) 
    monthly_variation = np.random.normal(1, 0.1, 12)
    monthly_pattern = monthly_pattern * monthly_variation
    monthly_pattern /= monthly_pattern.sum()
    
    monthly_deaths = np.round(monthly_pattern * total_deaths_yearly).astype(int)
    

    years.extend([year]*12)
    months.extend(range(1,13))
    cases.extend(monthly_deaths)

monthly_df = pd.DataFrame({'Year': years, 'Month': months, 'Cases': cases})

monthly_df.to_csv('./Monthly/monthly_tuberculosis.csv', index=False)

In [45]:
# Typhoid
# https://www.nature.com/articles/s41598-018-25234-w

yearly_data = pd.read_csv('./Yearly/yearly_typhoid.csv') 
years = []
months = []
cases = []
for index, row in yearly_data.iterrows():

    year = row['Year']
    
    total_deaths_yearly = row['Cases']
    
    monthly_pattern = np.array([1.05, 1.02, 1.00, 1.05, 1.15, 1.20, 1.12, 1.07, 0.98, 0.92, 0.90, 0.95]) 
    monthly_variation = np.random.normal(1, 0.1, 12)
    monthly_pattern = monthly_pattern * monthly_variation
    monthly_pattern /= monthly_pattern.sum()
    
    monthly_deaths = np.round(monthly_pattern * total_deaths_yearly).astype(int)
    
    years.extend([year]*12)
    months.extend(range(1,13))
    cases.extend(monthly_deaths)
monthly_df = pd.DataFrame({'Year': years, 'Month': months, 'Cases': cases})
monthly_df.to_csv('./Monthly/monthly_typhoid.csv', index=False)

In [12]:
# Unsafe sanitation

yearly_data = pd.read_csv('./Yearly/yearly_unsafe-sanitation.csv') 
monthly_data_all_years = pd.DataFrame(columns=['Year', 'Month', 'Deaths'])

for index, row in df.iterrows():
    year = row['Year']
    total_deaths = row['Deaths']

    np.random.seed(42)  # Set seed for reproducibility
    monthly_deaths = np.random.rand(12)
    monthly_deaths /= monthly_deaths.sum()
    monthly_deaths *= total_deaths

    # Create a DataFrame for monthly data for the current year
    monthly_data = pd.DataFrame({'Year': [year] * 12, 'Month': range(1, 13), 'Deaths': monthly_deaths})

    # Append the monthly data to the cumulative DataFrame
    monthly_data_all_years = pd.concat([monthly_data_all_years, monthly_data])
monthly_data_all_years.to_csv('./Monthly/monthly_unsafe-sanitation.csv', index=False)

In [15]:
# Testing if sum(monthly death data) == yearly data total
# unsafe-sanitation,pneumonia,diarrhea

yearly_data = pd.read_csv('./Yearly/yearly_diarrhea.csv')
monthly_data = pd.read_csv('./Monthly/monthly_diarrhea.csv')

# Group the monthly data by year and sum the deaths for each year
monthly_sum_by_year = monthly_data.groupby('Year')['Deaths'].sum().reset_index()

# Merge the yearly and monthly sums based on the 'Year' column
merged_data = pd.merge(yearly_data, monthly_sum_by_year, on='Year', suffixes=('_Yearly', '_Monthly'))

# Print the sum of monthly deaths and yearly death count for each year
for index, row in merged_data.iterrows():
    print(f"Year: {row['Year']}, Monthly Deaths Sum: {row['Deaths_Monthly']}, Yearly Deaths: {row['Deaths_Yearly']}")

Year: 2000, Monthly Deaths Sum: 1233146.9999999998, Yearly Deaths: 1233147.0
Year: 2001, Monthly Deaths Sum: 1197202.0, Yearly Deaths: 1197202.0
Year: 2002, Monthly Deaths Sum: 1172760.0, Yearly Deaths: 1172760.0
Year: 2003, Monthly Deaths Sum: 1126296.0, Yearly Deaths: 1126296.0
Year: 2004, Monthly Deaths Sum: 1053842.0, Yearly Deaths: 1053842.0
Year: 2005, Monthly Deaths Sum: 1028845.9999999999, Yearly Deaths: 1028846.0
Year: 2006, Monthly Deaths Sum: 1008272.9999999999, Yearly Deaths: 1008273.0
Year: 2007, Monthly Deaths Sum: 1002051.9999999999, Yearly Deaths: 1002052.0
Year: 2008, Monthly Deaths Sum: 984046.0999999999, Yearly Deaths: 984046.1
Year: 2009, Monthly Deaths Sum: 913215.0999999999, Yearly Deaths: 913215.1
Year: 2010, Monthly Deaths Sum: 885774.0999999999, Yearly Deaths: 885774.1
Year: 2011, Monthly Deaths Sum: 880135.1999999998, Yearly Deaths: 880135.2
Year: 2012, Monthly Deaths Sum: 837737.4999999999, Yearly Deaths: 837737.5
Year: 2013, Monthly Deaths Sum: 810350.299999