## Need: Dataset, Column Report, Row Report

In [16]:
import pandas as pd

# Load Excel file with column lists
allCols_df = pd.read_excel(
    '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1Data_ColumnReport.xlsx'
)

indicator_to_source = dict(zip(allCols_df["Indicator"], allCols_df["Source Table"]))

LifeExpCols_df = pd.read_excel(
    '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1Data_ColumnReport.xlsx',
    sheet_name='Life Expectancy Columns'
)
UnemCols_df = pd.read_excel(
    '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1Data_ColumnReport.xlsx',
    sheet_name='Unemployment Columns'
)

# Load main dataset
dataset_df = pd.read_csv(
    '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1Dataset_LCU_removed(except_exchange_rate).csv'
)

# ---------------------------------------------------------
# 1. Extract column lists from the Excel sheets
# ---------------------------------------------------------
life_exp_cols = LifeExpCols_df['Indicator'].tolist()
unem_cols = UnemCols_df['Indicator'].tolist()

# ---------------------------------------------------------
# 2. Retain only the columns that exist in the dataset
# ---------------------------------------------------------
valid_life_exp_cols = [col for col in life_exp_cols if col in dataset_df.columns]
valid_unem_cols = [col for col in unem_cols if col in dataset_df.columns]

# ---------------------------------------------------------
# 3. Define a helper function to ensure essential columns appear at the beginning
# ---------------------------------------------------------
def ensure_essential_columns_at_start(cols, df, essential_cols):
    """
    Ensures that the essential columns (if they exist in the dataset)
    are placed at the beginning of the columns list.
    """
    # Only consider essential columns that exist in the dataset
    valid_essentials = [col for col in essential_cols if col in df.columns]
    # Remove any occurrence of the essential columns from the existing list
    remaining = [col for col in cols if col not in valid_essentials]
    # Return the new list with essential columns first
    return valid_essentials + remaining

# Define essential columns in the desired order
essential_cols = ['CountryShortName', 'Year']

# ---------------------------------------------------------
# 4. Update the column lists for each group so that essential columns are first
# ---------------------------------------------------------
final_life_exp_cols = ensure_essential_columns_at_start(valid_life_exp_cols, dataset_df, essential_cols)
final_unem_cols = ensure_essential_columns_at_start(valid_unem_cols, dataset_df, essential_cols)

# ---------------------------------------------------------
# 5. Filter the dataset using the final column lists
# ---------------------------------------------------------
life_exp_data = dataset_df[final_life_exp_cols]
unem_data = dataset_df[final_unem_cols]

In [17]:
# ---------------------------
# Data Loading Functions
# ---------------------------
def load_dataset(dataset_path):
    """Load a CSV dataset."""
    return pd.read_csv(dataset_path)

def load_excel_sheet(excel_path, sheet_name=None):
    """Load an Excel sheet (or the first sheet if sheet_name is None)."""
    return pd.read_excel(excel_path, sheet_name=sheet_name)

# ---------------------------
# Filtering Functions
# ---------------------------
def get_valid_columns(df, columns):
    """Return the subset of columns that exist in the DataFrame."""
    return [col for col in columns if col in df.columns]

def ensure_essential_columns_at_start(cols, df, essential_cols):
    """
    Ensure that essential columns (if they exist in df)
    are placed at the beginning of the list.
    """
    valid_essentials = [col for col in essential_cols if col in df.columns]
    remaining = [col for col in cols if col not in valid_essentials]
    return valid_essentials + remaining

def filter_dataset(df, columns, essential_cols=['CountryShortName', 'Year']):
    """
    Filter the DataFrame to only include the valid columns from the provided list.
    Essential columns will be moved to the beginning.
    """
    valid_cols = get_valid_columns(df, columns)
    final_cols = ensure_essential_columns_at_start(valid_cols, df, essential_cols)
    return df[final_cols]

# ---------------------------
# Missing Data Report Functions
# ---------------------------
def create_missing_report_by_column_with_source(df, mapping):
    """
    Create a DataFrame reporting the count and percentage of missing values for each column,
    and add a column for the Source Table based on the mapping.
    """
    # Create missing values report with column names as a column named "Column"
    report = pd.DataFrame({
        'missing_count': df.isna().sum(),
        'missing_percent': df.isna().mean() * 100
    }).reset_index().rename(columns={'index': 'Column'})
    
    # Add the Source Table column by mapping the "Column" names to the dictionary
    report["Source Table"] = report["Column"].map(mapping)
    return report

# ---------------------------
# Utility Functions
# ---------------------------
def save_csv(df, output_path, index=False):
    """Save a DataFrame to a CSV file."""
    df.to_csv(output_path, index=index)

In [18]:
# ---------------------------
# Main Workflow
# ---------------------------
# Define file paths
excel_path = '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1Data_ColumnReport.xlsx'
dataset_path = '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1Dataset_LCU_removed(except_exchange_rate).csv'

# Load Excel sheets containing the column lists
allCols_df = load_excel_sheet(excel_path)
life_exp_cols_df = load_excel_sheet(excel_path, sheet_name='Life Expectancy Columns')
unem_cols_df = load_excel_sheet(excel_path, sheet_name='Unemployment Columns')

# Extract column lists from the Excel sheets
life_exp_cols = life_exp_cols_df['Indicator'].tolist()
unem_cols = unem_cols_df['Indicator'].tolist()

# Load the main dataset
dataset_df = load_dataset(dataset_path)

# Filter datasets using the lists (ensuring CountryShortName and Year are first)
life_exp_data = filter_dataset(dataset_df, life_exp_cols)
unem_data = filter_dataset(dataset_df, unem_cols)

# Generate missing reports for Life Expectancy data
life_exp_missing_report = create_missing_report_by_column_with_source(life_exp_data, indicator_to_source)

# Generate missing reports for Unemployment data
unem_missing_report = create_missing_report_by_column_with_source(unem_data, indicator_to_source)

# Save the filtered datasets to CSV files
save_csv(life_exp_data, '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/LifeExpectancyDataset.csv')
save_csv(unem_data, '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/UnemploymentDataset.csv')

# Save the missing reports for Life Expectancy data
save_csv(life_exp_missing_report, '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/LifeExp_MissingReport.csv')

# Save the missing reports for Unemployment data
save_csv(unem_missing_report, '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Unem_MissingReport.csv')

In [19]:
columns_to_keep = ["CountryShortName", "Year", "Life expectancy at birth, female (years)", "Life expectancy at birth, male (years)", "Life expectancy at birth, total (years)", "GOAL 13: Climate Action (5 year moving average)", "Pillar 3 - Data Products - Score", "GOAL 10: Reduced Inequality (5 year moving average)", "GOAL 11: Sustainable Cities and Communities (5 year moving average)", "GOAL 12: Responsible Consumption and Production (5 year moving average)", "GOAL 15: Life on Land (5 year moving average)", "GOAL 16: Peace and Justice Strong Institutions (5 year moving average)", "GOAL 17: Partnerships to achieve the Goal (5 year moving average)", "GOAL 1: No Poverty (5 year moving average)", "Health/Demographic survey (Availability score over 10 years)", "Household Survey on income, etc (Availability score over 10 years)", "Labor Force Survey (Availability score over 10 years)", "Pillar 1 - Data Use - Score", "General government final consumption expenditure (constant 2015 US$)", "General government final consumption expenditure (annual % growth)", "Final consumption expenditure (constant 2015 US$)", "Final consumption expenditure (annual % growth)", "Exports of goods and services (% of GDP)", "External balance on goods and services (% of GDP)", "Communications, computer, etc. (% of service imports, BoP)", "Communications, computer, etc. (% of service exports, BoP)", "Exports of goods, services and primary income (BoP, current US$)", "Adjusted net national income (current US$)", "Adjusted net national income per capita (current US$)", "Agriculture, forestry, and fishing, value added per worker (constant 2015 US$)", "Adjusted savings: education expenditure (% of GNI)", "GDP per capita, PPP (constant 2021 international $)", "GDP, PPP (constant 2021 international $)", "GNI per capita, PPP (current international $)", "GNI, PPP (current international $)", "Adjusted savings: energy depletion (% of GNI)", "Agriculture, forestry, and fishing, value added (% of GDP)", "GDP per capita, PPP (current international $)", "GDP, PPP (current international $)", "GDP (constant 2015 US$)", "GDP per capita (constant 2015 US$)", "GNI per capita, Atlas method (current US$)", "GNI, Atlas method (current US$)", "Adjusted savings: carbon dioxide damage (% of GNI)", "GNI (current US$)", "GDP growth (annual %)", "GDP per capita growth (annual %)", "GDP (current US$)", "GDP per capita (current US$)", "Preprimary education, duration (years)", "School enrollment, primary, female (% gross)", "School enrollment, primary, male (% gross)", "School enrollment, primary (% gross)", "Primary education, pupils", "Compulsory education, duration (years)", "Broad money (% of GDP)", "Automated teller machines (ATMs) (per 100,000 adults)", "Domestic credit to private sector (% of GDP)", "Commercial bank branches (per 100,000 adults)", "Claims on central government, etc. (% GDP)", "Monetary Sector credit to private sector (% GDP)", "Domestic credit to private sector by banks (% of GDP)", "Official exchange rate (LCU per US$, period average)", "Inflation, GDP deflator (annual %)", "Inflation, GDP deflator: linked series (annual %)", "Proportion of seats held by women in national parliaments (%)", "Women Business and the Law Index Score (scale 1-100)", "Fluorinated greenhouse gases (F-gases) emissions from Industrial Processes (Mt CO2e)", "Total greenhouse gas emissions including LULUCF (Mt CO2e)", "Carbon dioxide (CO2) net fluxes from LULUCF - Deforestation (Mt CO2e)", "Carbon dioxide (CO2) net fluxes from LULUCF - Forest Land (Mt CO2e)", "Carbon dioxide (CO2) net fluxes from LULUCF - Total excluding non-tropical fires (Mt CO2e)", "Water productivity, total (constant 2015 US$ GDP per cubic meter of total freshwater withdrawal)", "PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)", "Aquaculture production (metric tons)", "Total fisheries production (metric tons)", "Cereal production (metric tons)", "Land under cereal production (hectares)", "Access to electricity, rural (% of rural population)", "Fertilizer consumption (kilograms per hectare of arable land)", "Carbon dioxide (CO2) emissions from Fugitive Emissions (Energy) (Mt CO2e)", "Nitrous oxide (N2O) emissions from Fugitive Emissions (Energy) (Mt CO2e)", "Renewable internal freshwater resources per capita (cubic meters)", "Renewable internal freshwater resources, total (billion cubic meters)", "Access to clean fuels and technologies for cooking (% of population)", "Access to clean fuels and technologies for cooking, rural (% of rural population)", "Access to clean fuels and technologies for cooking, urban (% of urban population)", "Carbon intensity of GDP (kg CO2e per 2021 PPP $)", "Carbon intensity of GDP (kg CO2e per constant 2021 US$ of GDP)", "Rural population growth (annual %)", "Natural gas rents (% of GDP)", "Permanent cropland (% of land area)", "Carbon dioxide (CO2) emissions (total) excluding LULUCF (% change from 1990)", "Carbon dioxide (CO2) emissions (total) excluding LULUCF (Mt CO2e)", "Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)", "Carbon dioxide (CO2) emissions from Building (Energy) (Mt CO2e)", "Carbon dioxide (CO2) emissions from Industrial Combustion (Energy) (Mt CO2e)", "Carbon dioxide (CO2) emissions from Industrial Processes (Mt CO2e)", "Carbon dioxide (CO2) emissions from Power Industry (Energy) (Mt CO2e)", "Carbon dioxide (CO2) emissions from Transport (Energy) (Mt CO2e)", "Methane (CH4) emissions (total) excluding LULUCF (% change from 1990)", "Methane (CH4) emissions (total) excluding LULUCF (Mt CO2e)", "Methane (CH4) emissions from Agriculture (Mt CO2e)", "Methane (CH4) emissions from Building (Energy) (Mt CO2e)", "Methane (CH4) emissions from Fugitive Emissions (Energy) (Mt CO2e)", "Methane (CH4) emissions from Industrial Combustion (Energy) (Mt CO2e)", "Methane (CH4) emissions from Power Industry (Energy) (Mt CO2e)", "Methane (CH4) emissions from Transport (Energy) (Mt CO2e)", "Methane (CH4) emissions from Waste (Mt CO2e)", "Nitrous oxide (N2O) emissions (total) excluding LULUCF (% change from 1990)", "Nitrous oxide (N2O) emissions (total) excluding LULUCF (Mt CO2e)", "Nitrous oxide (N2O) emissions from Agriculture (Mt CO2e)", "Nitrous oxide (N2O) emissions from Building (Energy) (Mt CO2e)", "Nitrous oxide (N2O) emissions from Industrial Combustion (Energy) (Mt CO2e)", "Nitrous oxide (N2O) emissions from Industrial Processes (Mt CO2e)", "Nitrous oxide (N2O) emissions from Power Industry (Energy) (Mt CO2e)", "Nitrous oxide (N2O) emissions from Transport (Energy) (Mt CO2e)", "Nitrous oxide (N2O) emissions from Waste (Mt CO2e)", "Total greenhouse gas emissions excluding LULUCF (% change from 1990)", "Total greenhouse gas emissions excluding LULUCF (Mt CO2e)", "Total greenhouse gas emissions per capita excluding LULUCF (t CO2e/capita)", "Oil rents (% of GDP)", "Coal rents (% of GDP)", "Total natural resources rents (% of GDP)", "Agricultural land (% of land area)", "Arable land (% of land area)", "Crop production index (2014-2016 = 100)", "Food production index (2014-2016 = 100)", "Forest area (% of land area)", "Forest area (sq. km)", "Land area (sq. km)", "Livestock production index (2014-2016 = 100)", "Population density (people per sq. km of land area)", "Energy intensity level of primary energy (MJ/$2017 PPP GDP)", "Access to electricity (% of population)", "Access to electricity, urban (% of urban population)", "Arable land (hectares per person)", "Capture fisheries production (metric tons)", "Forest rents (% of GDP)", "Mineral rents (% of GDP)", "Renewable energy consumption (% of total final energy consumption)", "Rural population", "Rural population (% of total population)", "Surface area (sq. km)", "Urban population", "Urban population (% of total population)", "Urban population growth (annual %)", "Prevalence of stunting, height for age (modeled estimate, % of children under 5)", "Prevalence of overweight (modeled estimate, % of children under 5)", "Prevalence of anemia among children (% of children ages 6-59 months)", "Prevalence of anemia among non-pregnant women (% of women ages 15-49)", "Prevalence of anemia among pregnant women (%)", "Prevalence of anemia among women of reproductive age (% of women ages 15-49)", "People using at least basic sanitation services, rural (% of rural population)", "People using at least basic drinking water services, urban (% of urban population)", "People using at least basic sanitation services, urban (% of urban population)", "Prevalence of undernourishment (% of population)", "Tuberculosis treatment success rate (% of new cases)", "Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)", "Total alcohol consumption per capita, female (liters of pure alcohol, projected estimates, female 15+ years of age)", "Total alcohol consumption per capita, male (liters of pure alcohol, projected estimates, male 15+ years of age)", "Lifetime risk of maternal death (%)", "Lifetime risk of maternal death (1 in: rate varies by country)", "Maternal mortality ratio (modeled estimate, per 100,000 live births)", "Number of maternal deaths", "People using at least basic drinking water services (% of population)", "People practicing open defecation (% of population)", "Mortality rate, adult, female (per 1,000 female adults)", "Mortality rate, adult, male (per 1,000 male adults)", "Tuberculosis case detection rate (%, all forms)", "Out-of-pocket expenditure (% of current health expenditure)", "Out-of-pocket expenditure per capita (current US$)", "Current health expenditure (% of GDP)", "Current health expenditure per capita (current US$)", "Current health expenditure per capita, PPP (current international $)", "Domestic general government health expenditure (% of GDP)", "Domestic general government health expenditure (% of current health expenditure)", "Domestic general government health expenditure per capita (current US$)", "Domestic general government health expenditure per capita, PPP (current international $)", "Domestic private health expenditure (% of current health expenditure)", "Domestic private health expenditure per capita (current US$)", "Domestic private health expenditure per capita, PPP (current international $)", "Out-of-pocket expenditure per capita, PPP (current international $)", "People using at least basic sanitation services (% of population)", "Adolescent fertility rate (births per 1,000 women ages 15-19)", "Birth rate, crude (per 1,000 people)", "Death rate, crude (per 1,000 people)", "Immunization, DPT (% of children ages 12-23 months)", "Immunization, measles (% of children ages 12-23 months)", "Incidence of tuberculosis (per 100,000 people)", "Mortality rate, infant (per 1,000 live births)", "Mortality rate, infant, female (per 1,000 live births)", "Mortality rate, infant, male (per 1,000 live births)", "Mortality rate, neonatal (per 1,000 live births)", "Mortality rate, under-5 (per 1,000 live births)", "Mortality rate, under-5, female (per 1,000 live births)", "Mortality rate, under-5, male (per 1,000 live births)", "Number of deaths ages 10-14 years", "Number of deaths ages 15-19 years", "Number of deaths ages 20-24 years", "Number of deaths ages 5-9 years", "Number of infant deaths", "Number of neonatal deaths", "Number of under-five deaths", "Population ages 0-14 (% of total population)", "Population ages 0-14, female", "Population ages 0-14, female (% of female population)", "Population ages 0-14, male", "Population ages 0-14, male (% of male population)", "Population ages 0-14, total", "Population ages 10-14, female (% of female population)", "Population ages 10-14, male (% of male population)", "Population ages 15-19, female (% of female population)", "Population ages 15-19, male (% of male population)", "Population ages 15-64 (% of total population)", "Probability of dying among adolescents ages 10-14 years (per 1,000)", "Probability of dying among adolescents ages 15-19 years (per 1,000)", "Probability of dying among children ages 5-9 years (per 1,000)", "Probability of dying among youth ages 20-24 years (per 1,000)", "International tourism, number of arrivals", "Fuel exports (% of merchandise exports)", "Insurance and financial services (% of commercial service exports)", "Ores and metals imports (% of merchandise imports)", "Fuel imports (% of merchandise imports)", "International tourism, expenditures for travel items (current US$)", "International tourism, receipts for travel items (current US$)", "Transport services (% of commercial service exports)", "Insurance and financial services (% of commercial service imports)", "Travel services (% of commercial service exports)", "Transport services (% of commercial service imports)", "Travel services (% of commercial service imports)", "Computer, communications and other services (% of commercial service exports)", "Computer, communications and other services (% of commercial service imports)", "Export unit value index (2015 = 100)", "Import unit value index (2015 = 100)", "Refugee population by country or territory of asylum", "Share of youth not in education, employment or training, female (% of female youth population) (modeled ILO estimate)", "Share of youth not in education, employment or training, male (% of male youth population) (modeled ILO estimate)", "Share of youth not in education, employment or training, total (% of youth population) (modeled ILO estimate)", "Refugee population by country or territory of origin", "GDP per person employed (constant 2021 PPP $)", "Employment to population ratio, 15+, female (%) (modeled ILO estimate)", "Employment to population ratio, 15+, male (%) (modeled ILO estimate)", "Employment to population ratio, 15+, total (%) (modeled ILO estimate)", "Employment to population ratio, ages 15-24, female (%) (modeled ILO estimate)", "Employment to population ratio, ages 15-24, male (%) (modeled ILO estimate)", "Employment to population ratio, ages 15-24, total (%) (modeled ILO estimate)", "Labor force participation rate for ages 15-24, female (%) (modeled ILO estimate)", "Labor force participation rate for ages 15-24, male (%) (modeled ILO estimate)", "Labor force participation rate for ages 15-24, total (%) (modeled ILO estimate)", "Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)", "Labor force participation rate, female (% of female population ages 15-64) (modeled ILO estimate)", "Labor force participation rate, male (% of male population ages 15+) (modeled ILO estimate)", "Labor force participation rate, male (% of male population ages 15-64) (modeled ILO estimate)", "Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)", "Labor force participation rate, total (% of total population ages 15-64) (modeled ILO estimate)", "Labor force, female (% of total labor force)", "Labor force, total", "Net migration", "Ratio of female to male labor force participation rate (%) (modeled ILO estimate)", "Self-employed, female (% of female employment) (modeled ILO estimate)", "Self-employed, male (% of male employment) (modeled ILO estimate)", "Self-employed, total (% of total employment) (modeled ILO estimate)", "Unemployment, female (% of female labor force) (modeled ILO estimate)", "Unemployment, male (% of male labor force) (modeled ILO estimate)", "Unemployment, total (% of total labor force) (modeled ILO estimate)", "Unemployment, youth female (% of female labor force ages 15-24) (modeled ILO estimate)", "Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)", "Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)", "Vulnerable employment, female (% of female employment) (modeled ILO estimate)", "Vulnerable employment, male (% of male employment) (modeled ILO estimate)", "Vulnerable employment, total (% of total employment) (modeled ILO estimate)", "Wage and salaried workers, female (% of female employment) (modeled ILO estimate)", "Wage and salaried workers, male (% of male employment) (modeled ILO estimate)", "Wage and salaried workers, total (% of total employment) (modeled ILO estimate)"
]

# Filter your DataFrame to include only these columns
df = dataset_df[columns_to_keep]
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect(r"/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 2/Database Files/BANA698GROUP1.db.db")

# Read the "Country" table selecting only the required columns
df_country = pd.read_sql_query("SELECT [Short Name], [Region], [Income Group] FROM Country", conn)

# Merge the country data into your dataset
df = df.merge(df_country, left_on="CountryShortName", right_on="Short Name", how="left")

# Optionally, drop the redundant "Short Name" column after merging
df.drop(columns=["Short Name"], inplace=True)

# Close the database connection
conn.close()

dataset_missing_report = create_missing_report_by_column_with_source(df, indicator_to_source)

save_csv(dataset_missing_report, '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Dataset_Missing_Report.csv')

save_csv(df, '/Users/josh/Desktop/Macbook Working Files/Git Repos/culminating-project-group-1/Week 5/Datasets and Research Questions/JC/Group1DatasetFinal.csv')

df

Unnamed: 0,CountryShortName,Year,"Life expectancy at birth, female (years)","Life expectancy at birth, male (years)","Life expectancy at birth, total (years)",GOAL 13: Climate Action (5 year moving average),Pillar 3 - Data Products - Score,GOAL 10: Reduced Inequality (5 year moving average),GOAL 11: Sustainable Cities and Communities (5 year moving average),GOAL 12: Responsible Consumption and Production (5 year moving average),...,"Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)","Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)","Wage and salaried workers, female (% of female employment) (modeled ILO estimate)","Wage and salaried workers, male (% of male employment) (modeled ILO estimate)","Wage and salaried workers, total (% of total employment) (modeled ILO estimate)",Region,Income Group
0,Afghanistan,2008,61.529,58.172,59.852,0.000,50.95000,0.500,0.000,1.000,...,9.384,10.135,98.195259,89.193279,90.607745,1.747149,10.237973,8.903808,South Asia,Low income
1,Afghanistan,2009,61.892,58.820,60.364,0.000,50.20625,0.500,0.000,1.000,...,9.252,10.008,97.905184,88.318177,89.851765,2.012832,10.995864,9.558891,South Asia,Low income
2,Afghanistan,2010,62.408,59.278,60.851,0.000,46.65625,0.500,0.000,0.667,...,9.222,9.990,97.655379,87.575836,89.218193,2.235117,11.591773,10.067202,South Asia,Low income
3,Afghanistan,2011,62.993,59.825,61.419,0.000,47.91875,0.400,0.000,0.667,...,9.220,10.003,97.155673,86.471425,88.243631,2.669294,12.577843,10.934303,South Asia,Low income
4,Afghanistan,2012,63.514,60.317,61.923,0.000,49.16875,0.400,0.000,0.667,...,9.255,10.054,96.670295,85.362101,87.269726,3.080709,13.513905,11.753901,South Asia,Low income
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2881,Zimbabwe,2017,62.956,58.208,60.709,0.500,60.91250,0.600,0.800,0.750,...,9.092,9.827,79.133607,62.711302,70.977397,19.889681,34.894377,27.341793,Sub-Saharan Africa,Lower middle income
2882,Zimbabwe,2018,63.594,58.968,61.414,0.500,61.17500,0.600,0.800,0.750,...,10.031,10.699,77.848366,61.149456,69.579895,20.862983,35.723073,28.220997,Sub-Saharan Africa,Lower middle income
2883,Zimbabwe,2019,63.661,58.644,61.292,1.000,70.23750,0.600,1.000,0.750,...,11.114,11.718,76.330266,59.538768,68.043297,21.827520,36.523147,29.080129,Sub-Saharan Africa,Lower middle income
2884,Zimbabwe,2020,63.862,58.129,61.124,1.000,75.51250,0.571,0.667,0.750,...,13.237,14.258,76.404615,60.064857,68.271819,21.667887,36.090362,28.846376,Sub-Saharan Africa,Lower middle income
