In [1]:
import pandas as pd
import numpy as np

# Function to load data
def load_data(file, sheet_name, header=None):
    return pd.read_excel(file, sheet_name=sheet_name, header=header)

# Function to filter data by status
def filter_by_status(df, status):
    return df[df['Status'] == status]

# Function to replace gender and calculate age-based household head gender
def gender_age_processing(df):
    sex_mapping = {1: 'Male', 2: 'Female'}
    df['Gender'] = df['hhh_sex'].replace(sex_mapping)
    df['Household_Head_Gender'] = df['Gender']
    df['Household_Head_Gender'] = df.apply(
        lambda row: 'Youth Headed' if row['hhh_age'] <= 30 else str(row['Household_Head_Gender']) + ' Headed', axis=1
    )
    return df

# Function to create income group categories
def create_income_groups(df, income_column, boundaries, labels, group_col_name):
    df[group_col_name] = pd.cut(df[income_column], bins=boundaries, labels=labels, right=False)
    return df

In [None]:
# Loading data
df1 = load_data('All2021Cohorts_merged_outlier managed.xlsx', 'MergedRTV_Control2021', 5)
# Additional data loading here...

In [None]:
# Processing datasets
df = filter_by_status(df1, "RTV")
df = gender_age_processing(df)

In [None]:
# Additional data processing here...

# Define income group boundaries and labels for different groupings
boundaries1 = [0, 0.75, 1.25, 2, 3, float('inf')]
labels1 = ["0-<0.75/Day", "0.75-<1.25/Day", "1.25-<2/Day", "2-<3/Day", "3+/Day"]

boundaries2 = [0, 2.15, float('inf')]
labels2 = ["0-<2.15/Day", "2.15+/Day"]

boundaries3 = [0, 0.75, 1.25, 2.15, 3, float('inf')]
labels3 = ["0-<0.75/Day", "0.75-<1.25/Day", "1.25-<2.15/Day", "2.15-<3/Day", "3+/Day"]

# Apply income groupings to each dataset with different groupings
income_col = 'HH Income + Consumption + Residues/Day (USD_adult_equiv_adj)'
df = create_income_groups(df, income_col, boundaries1, labels1, 'income_group_1')
df = create_income_groups(df, income_col, boundaries2, labels2, 'income_group_2')
df = create_income_groups(df, income_col, boundaries3, labels3, 'income_group_3')
# Repeat for other datasets with appropriate boundaries and labels

# Save datasets if needed
# df.to_excel("df.xlsx")
# Additional saving here...