In [4]:
import pandas as pd 
import numpy as numpy
import geopandas as gpd
import re
from pathlib import Path


In [5]:
# Paths to data
worldpop_dir = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data")  # Folder containing worldpop_yyyy.csv
demographic_dir = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data\agesexstructure")  # Folder containing demographic data by year

# Output directory for processed WorldPop files
output_dir = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data")
output_dir.mkdir(exist_ok=True, parents=True)


In [None]:
def normalize_name(name):
    """
    Normalize tehsil names by removing extra spaces and replacing spaces with underscores.
    """
    name = re.sub(r'\s+', ' ', name.strip())  # Replace multiple spaces with a single space
    return name.replace(" ", "_").upper()  # Replace spaces with underscores and convert to uppercase


In [6]:

def calculate_tehsil_statistics(demographic_file):
    """
    Calculate the required statistics for a single demographic file.
    """
    df = pd.read_csv(demographic_file)

    # Calculate total male and female population
    total_male = df["male"].sum()
    total_female = df["female"].sum()

    # Calculate mean sex ratio
    mean_sex_ratio = total_female / total_male if total_male > 0 else 0

    # Calculate aged population (classes 65, 70, 75, 80)
    aged_population = df[df["class"].isin([65, 70, 75, 80])][["male", "female"]].sum().sum()

    # Calculate young population (classes 0, 1)
    young_population = df[df["class"].isin([0, 1])][["male", "female"]].sum().sum()

    return mean_sex_ratio, aged_population, young_population

def process_yearly_data(year):
    """
    Process the data for a given year and add statistics to the WorldPop file.
    """
    # Load WorldPop data
    worldpop_file = worldpop_dir / f"worldpopstats_{year}.csv"
    worldpop_df = pd.read_csv(worldpop_file)

    # Prepare to store the calculated statistics
    statistics = []

    # Demographic folder for the year
    demographic_year_dir = demographic_dir / str(year)

    # Process each rc demographic file
    for _, row in worldpop_df.iterrows():
        object_id = row["object_id"]
        # Locate the corresponding demographic file
        demographic_file = demographic_year_dir / f"{year}_{object_id}.csv"
        if demographic_file.exists():
            mean_sex_ratio, aged_population, young_population = calculate_tehsil_statistics(demographic_file)
        else:
            # If demographic file is missing, use NaN
            print(f"Missing demographic file for {object_id}")
            mean_sex_ratio, aged_population, young_population = None, None, None

        # Append statistics for this tehsil
        statistics.append({
            "object_id": object_id,
            "mean_sex_ratio": mean_sex_ratio,
            "sum_aged_population": aged_population,
            "sum_young_population": young_population
        })

    # Convert statistics to a DataFrame
    stats_df = pd.DataFrame(statistics)

    # Merge statistics into the WorldPop DataFrame
    updated_worldpop_df = pd.merge(worldpop_df, stats_df, on="object_id", how="left")

    # Drop the normalized column and save the updated file
    updated_worldpop_file = output_dir / f"worldpopstats_{year}.csv"
    updated_worldpop_df.to_csv(updated_worldpop_file, index=False)
    print(f"Processed data saved to {updated_worldpop_file}")

# Main processing loop for all years
for year in [2016,2017, 2018, 2019, 2020]:
    process_yearly_data(year)


Processed data saved to D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data\worldpopstats_2016.csv
Processed data saved to D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data\worldpopstats_2017.csv
Processed data saved to D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data\worldpopstats_2018.csv
Processed data saved to D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data\worldpopstats_2019.csv
Processed data saved to D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\Deployment\flood-data-ecosystem-Assam\Sources\WORLDPOP\data\worldpopstats_2020.csv
