In [10]:
import pandas as pd 
import numpy as numpy
import geopandas as gpd
import re
import pandas as pd
from pathlib import Path

In [12]:
# Paths to data
worldpop_dir = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\WORLDPOP\data")  # Folder containing worldpop_yyyy.csv
demographic_dir = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\WORLDPOP\data\agesexstructure")  # Folder containing demographic data by year

# Output directory for processed WorldPop files
output_dir = Path(r"D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\WORLDPOP\data")
output_dir.mkdir(exist_ok=True, parents=True)


In [4]:

def calculate_tehsil_statistics(demographic_file):
    """
    Calculate the required statistics for a single demographic file.
    """
    df = pd.read_csv(demographic_file)

    # Calculate total male and female population
    total_male = df["male"].sum()
    total_female = df["female"].sum()

    # Calculate mean sex ratio
    mean_sex_ratio = total_female / total_male if total_male > 0 else 0

    # Calculate aged population (classes 65, 70, 75, 80)
    aged_population = df[df["class"].isin([65, 70, 75, 80])][["male", "female"]].sum().sum()

    # Calculate young population (classes 0, 1)
    young_population = df[df["class"].isin([0, 1])][["male", "female"]].sum().sum()

    return mean_sex_ratio, aged_population, young_population

def process_yearly_data(year):
    """
    Process the data for a given year and add statistics to the WorldPop file.
    """
    # Load WorldPop data
    worldpop_file = worldpop_dir / f"worldpopstats_{year}.csv"
    worldpop_df = pd.read_csv(worldpop_file)

    # Prepare to store the calculated statistics
    statistics = []

    # Demographic folder for the year
    demographic_year_dir = demographic_dir / str(year)

    # Process each tehsil demographic file
    for _, row in worldpop_df.iterrows():
        block_name = row["block_name"].replace(" ","_")

        # Locate the corresponding demographic file
        demographic_file = demographic_year_dir / f"{year}_{block_name.upper()}.csv"
        if demographic_file.exists():
            mean_sex_ratio, aged_population, young_population = calculate_tehsil_statistics(demographic_file)
        else:
            # If demographic file is missing, use NaN
            mean_sex_ratio, aged_population, young_population = None, None, None

        # Append statistics for this tehsil
        statistics.append({
            "block": block_name,
            "mean_sex_ratio": mean_sex_ratio,
            "sum_aged_population": aged_population,
            "sum_young_population": young_population
        })

    # Convert statistics to a DataFrame
    stats_df = pd.DataFrame(statistics)
    #stats_df['TEHSIL'] = stats_df['TEHSIL'].replace("_"," ")
    # Merge statistics into the WorldPop DataFrame
    print(stats_df)
    updated_worldpop_df = pd.merge(worldpop_df, stats_df, on="block_name", how="left")

    # Save the updated WorldPop file
    updated_worldpop_file = output_dir / f"worldpopstats_{year}.csv"
    updated_worldpop_df.to_csv(updated_worldpop_file, index=False)
    print(f"Processed data saved to {updated_worldpop_file}")

# Main processing loop for all years
for year in [2017, 2018, 2019, 2020]:
    process_yearly_data(year)


           block  mean_sex_ratio  sum_aged_population  sum_young_population
0         ANUGUL        0.924546              8715.18              11426.59
1      ATHMALLIK        0.924568              6647.76               8715.38
2       BANARPAL        0.924639              9034.05              11816.51
3    CHHENDIPADA        0.925147             14413.32              18918.25
4         KANIHA        0.924547              6214.61               8148.05
..           ...             ...                  ...                   ...
309      NUAGAON        0.956075              6438.28              10433.86
310   RAJGANGPUR        0.953661              9840.93              15620.05
311      SUBDEGA        0.953633              3662.99               5823.80
312   SUNDARGARH        0.952444              6154.27               9595.57
313   TANGARPALI        0.953633              5015.98               7974.92

[314 rows x 4 columns]


KeyError: 'block_name'

In [None]:
def calculate_tehsil_statistics(demographic_file):
    """
    Calculate the required statistics for a single demographic file.
    """
    df = pd.read_csv(demographic_file)

    # Calculate total male and female population
    total_male = df["male"].sum()
    total_female = df["female"].sum()

    # Calculate mean sex ratio
    mean_sex_ratio = total_female / total_male if total_male > 0 else 0

    # Calculate aged population (classes 65, 70, 75, 80)
    aged_population = df[df["class"].isin([65, 70, 75, 80])][["male", "female"]].sum().sum()

    # Calculate young population (classes 0, 1)
    young_population = df[df["class"].isin([0, 1])][["male", "female"]].sum().sum()

    return mean_sex_ratio, aged_population, young_population
def normalize_name(name):
    """
    Normalize tehsil names by removing extra spaces and replacing spaces with underscores.
    """
    name = name.replace("-", " ")
    name = re.sub(r'\s+', ' ', name.strip())  # Replace multiple spaces with a single space
    return name.replace(" ", "_").upper()  # Replace spaces with underscores and convert to uppercase

def calculate_block_statistics(demographic_file):
    """
    Calculate the required statistics for a single demographic file.
    """
    df = pd.read_csv(demographic_file)

    # Calculate total male and female population
    total_male = df["male"].sum()
    total_female = df["female"].sum()

    # Calculate mean sex ratio
    mean_sex_ratio = total_female / total_male if total_male > 0 else 0

    # Calculate aged population (classes 65, 70, 75, 80)
    aged_population = df[df["class"].isin([65, 70, 75, 80])][["male", "female"]].sum().sum()

    # Calculate young population (classes 0, 1)
    young_population = df[df["class"].isin([0, 1])][["male", "female"]].sum().sum()

    return mean_sex_ratio, aged_population, young_population

def process_yearly_data(year):
    """
    Process the data for a given year and add statistics to the WorldPop file.
    """
    # Load WorldPop data
    worldpop_file = worldpop_dir / f"worldpopstats_{year}.csv"
    worldpop_df = pd.read_csv(worldpop_file)

    # Normalize tehsil names in the WorldPop data
    worldpop_df["BLOCK_NORMALIZED"] = worldpop_df["block_name"].apply(normalize_name)

    # Prepare to store the calculated statistics
    statistics = []

    # Demographic folder for the year
    demographic_year_dir = demographic_dir / str(year)

    # Process each tehsil demographic file
    for _, row in worldpop_df.iterrows():
        block_normalized = row["BLOCK_NORMALIZED"]

        # Locate the corresponding demographic file
        demographic_file = demographic_year_dir / f"{year}_{block_normalized}.csv"
        if demographic_file.exists():
            mean_sex_ratio, aged_population, young_population = calculate_tehsil_statistics(demographic_file)
        else:
            # If demographic file is missing, use NaN
            print(f"File not found: {demographic_file}")

            mean_sex_ratio, aged_population, young_population = None, None, None

        # Append statistics for this tehsil
        statistics.append({
            "BLOCK_NORMALIZED": block_normalized,
            "mean_sex_ratio": mean_sex_ratio,
            "sum_aged_population": aged_population,
            "sum_young_population": young_population
        })

    # Convert statistics to a DataFrame
    stats_df = pd.DataFrame(statistics)

    # Merge statistics into the WorldPop DataFrame
    updated_worldpop_df = pd.merge(worldpop_df, stats_df, on="BLOCK_NORMALIZED", how="left")

    # Drop the normalized column and save the updated file
    updated_worldpop_df.drop(columns=["BLOCK_NORMALIZED"], inplace=True)
    updated_worldpop_file = output_dir / f"worldpopstats_{year}.csv"
    updated_worldpop_df.to_csv(updated_worldpop_file, index=False)
    print(f"Processed data saved to {updated_worldpop_file}")

# Main processing loop for all years
for year in [2017, 2018, 2019, 2020]:
    process_yearly_data(year)


Processed data saved to D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\flood-data-ecosystem-Odisha\Sources\WORLDPOP\data\worldpopstats_2017.csv


In [24]:
bhattiyat = pd.read_csv(r'D:\CivicDataLab_IDS-DRR\IDS-DRR_Github\HP\flood-data-ecosystem-Himachal-Pradesh\Sources\WORLDPOP\data\agesexstructure\2019\2019_BHATTIYAT.csv')

In [25]:
bhattiyat[['male','female']].sum()

male      24932.81
female    24156.02
dtype: float64