In [1]:
import pandas as pd

In [5]:
# Load your data
population_df = pd.read_csv("population_state.csv")
hospital_beds_df = pd.read_csv("hospital_beds.csv")

# --- Preprocess population data ---
# Define senior age groups
senior_ages = ["60-64", "65-69", "70+", "70-74","75-79", "80-84", "85+"]  # if we want to focus on the senior citizen

pop_df = population_df[
    (population_df["sex"] == "both") &
    (population_df["age"] == "overall") &
    (population_df["ethnicity"] == "overall") 
].copy()
pop_df["population"] = (pop_df["population"] * 1000).astype(int)  # assume population is in thousands
pop_df["year"] = pd.to_datetime(pop_df["date"]).dt.year
pop_df = pop_df[["state", "year", "population"]]
# Group by state and year to sum senior population
pop_df = pop_df.groupby(["state", "year"], as_index=False)["population"].sum()


senior_pop_df = population_df[
    (population_df["sex"] == "both") &
    (population_df["age"].isin(senior_ages)) &
    (population_df["ethnicity"] == "overall") 
].copy()
senior_pop_df["population"] = (senior_pop_df["population"] * 1000).astype(int)
senior_pop_df.rename(columns={"population": "population_senior"}, inplace=True)# assume population is in thousands
senior_pop_df["year"] = pd.to_datetime(senior_pop_df["date"]).dt.year
senior_pop_df = senior_pop_df[["state", "year", "population_senior"]]
# Group by state and year to sum senior population
senior_pop_df = senior_pop_df.groupby(["state", "year"], as_index=False)["population_senior"].sum()


# --- Preprocess hospital beds data ---
beds_df = hospital_beds_df[
    (hospital_beds_df["district"] == "All Districts") &
    (hospital_beds_df["type"] == "all")
].copy()
beds_df["year"] = pd.to_datetime(beds_df["date"]).dt.year
beds_df = beds_df[["state", "year", "beds"]]

# --- Merge the datasets ---
merged_df = (
    pop_df
    .merge(beds_df, on=["state", "year"])
    .merge(senior_pop_df, on=["state", "year"])
)

# --- Calculate hospital beds per 1000 people (if focus on senior citizen only, it would be beds per 1000 senior citizen) ---
merged_df["beds_per_1000"] = round(merged_df["beds"] / (merged_df["population"] / 1000)).astype(int)
merged_df["beds_per_1000_senior"] = round(merged_df["beds"] / (merged_df["population_senior"] / 1000)).astype(int)

# View the result
print(merged_df.to_string())

merged_df.to_csv("./hospital_beds_with_population.csv", index = False)

                 state  year  population  beds  population_senior  beds_per_1000  beds_per_1000_senior
0                Johor  2015     3610300  4968             347800              1                    14
1                Johor  2016     3651800  5185             363800              1                    14
2                Johor  2017     3697000  5185             380300              1                    14
3                Johor  2018     3749400  5200             397700              1                    13
4                Johor  2019     3761200  5200             412800              1                    13
5                Johor  2020     4009700  5270             401800              1                    13
6                Johor  2021     4020000  5278             420000              1                    13
7                Johor  2022     4028300  5433             434700              1                    12
8                Kedah  2015     2096500  2653             224700        