In [29]:

## FROM BENCHMARK TABLES TO AGGREGATE FINAL DEMAND ##

# THE FOLLOWING SCRIPT CONTAINS A LIBRARY OF 3 FUNCTIONS WHICH USE MULTI-INDEXED INPUT-OUTPUT DATA TO:

# (A)	EXTRACT SPECIFIED FINAL DEMAND CATEGORIES RETAINING INDICES: Create a standard format of final demand categories
    # i.	Function developed with help of ChatGPT preserving the multi-index format of industry/commodity codes and names
    # ii.	All final demand categories were extracted: exports, imports, inventory changes were removed in post
    
# (B)	CREATE CONCORDANCE TABLES FOR DEFINED AGGREGATE CATEGORIES: 
    # i.	Function developed with help of ChatGPT to group industry codes based on lexical range for manually constructed aggregate categories
    # ii.	Aggregate level was defined based on year of highest aggregation:
        # 1.	Year 1997 has only 3 government consumption categories and was chosen as the default level of aggregation.
    # iii.	Then a binary matrix of aggregate columns and disaggregated rows is constructed with a binary 0/1 format for multiplication.

# (C)	CONDUCT MATRIX AGGREGATION: Complete matrix aggregation by matching the data frames
    # i.	Match the final demand matrix columns to the concordance columns for each year, and then do cross-multiplication to aggregate

import os
os.chdir(r'C:\Users\albin\Documents\Vienna\Semester 4\Thesis\2) Socio-metabolic branch\1) Capital formation\Jan GitHub repository\BEA benchmark IO tables & assembly')

import pandas as pd

## CREATE EXCEL Y-MATRICES FOR ALL YEARS ##

def create_Y_matrix(filepath, code_start, code_end,
                    output_path=None, sheet_name=0,
                    code_placement=(0, slice(2, None)),
                    name_placement=(1, slice(2, None)),
                    data_start_row=3):
    
    # Load Jan's raw Benchmark tables without headers
    Z_raw = pd.read_excel(filepath, header=None, sheet_name=sheet_name)
    
    # Extract Industry codes and names (for most years excel row 0 for codes, row 1 for names), starting from column 2
    industry_codes = Z_raw.iloc[code_placement]#.astype(str).values
    industry_names = Z_raw.iloc[name_placement]#.astype(str).values
    
    # Filter Final demand columns: keep columns with the final demand industry codes (e.g. between 910 and 993, or F01 and F10)
    keep_mask = industry_codes.astype(str).str[:3].between(code_start, code_end)
    final_demand_codes = industry_codes[keep_mask]#.astype(str).values
    final_demand_names = industry_names[keep_mask]#.astype(str).values
    
    # Re-establish Jan's MultiIndex format for columns, for the array values use labels "Y_code" and "Y_name"
    final_demand_multiindex = pd.MultiIndex.from_arrays(
        [final_demand_codes.values, final_demand_names.values],
        names=["Y_code", "Y_name"]
    )
    
    # Extract commodity codes and names (in most years these are found in excel row 3 onward, for cols 0 and 1)
    commodity_codes = Z_raw.iloc[data_start_row:, 0]#.astype(str).values
    commodity_names = Z_raw.iloc[data_start_row:, 1]#.astype(str).values
    
    # Re-establish Jan's MultiIndex format for rows, for the array values use labels "commodity_code" and "commodity_name"
    commodity_multiindex = pd.MultiIndex.from_arrays(
        [commodity_codes.values, commodity_names.values],
        names=["commodity_code", "commodity_name"]
    )
    
    # Extract filtered matrix columns: 
    # For each keep_mask filtered industry code (i.e. keep = True), calculate the actual column index (ignoring columns 1 and 2)
    # Then extract the data portion from row 2 on for each index element
    filtered_col_indices = [i + 2 for i, keep in enumerate(keep_mask) if keep]
    Y_data = Z_raw.iloc[data_start_row:, filtered_col_indices]
    
    # Apply multi-index structure
    Y_data.columns = final_demand_multiindex
    Y_data.index = commodity_multiindex
    
    # Save to Excel and indicate when each file has been created
    if output_path:
        Y_data.to_excel(output_path)
        print(f"Filtered Final demand table saved to: {output_path}")
        
    return Y_data

## CREATE EXCEL MATRIX FOR 1963 ##

FD_1963 = create_Y_matrix(
    filepath=r'1963_TotalTransactions.xlsx',
    output_path='New Y-Matrices\Y_matrix_1963.xlsx',
    code_start='966',
    code_end='989'
)

## CREATE EXCEL MATRIX FOR 1967 ##

FD_1967 = create_Y_matrix(
    filepath=r'1967_TotalTransactions.xlsx',
    output_path='New Y-Matrices\Y_matrix_1967.xlsx',
    code_start='910',
    code_end='989'
)

## CREATE EXCEL MATRIX FOR 1972 ##

FD_1972 = create_Y_matrix(
    filepath=r'1972_Use_NoZeroEntries_v2_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_1972.xlsx',
    code_start='910',
    code_end='993'
)

## CREATE EXCEL MATRIX FOR 1977 ##

FD_1977 = create_Y_matrix(
    filepath=r'1977_Use_NoZeroEntries_v2_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_1977.xlsx',
    code_start='910',
    code_end='993'
)

## CREATE EXCEL MATRIX FOR 1982 ##

FD_1982 = create_Y_matrix(
    filepath=r'1982_Use_NoZeroEntries_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_1982.xlsx',
    code_start='910',
    code_end='993'
)

## CREATE EXCEL MATRIX FOR 1987 ##

FD_1987 = create_Y_matrix(
    filepath=r'1987_Use_NoZeroEntries_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_1987.xlsx',
    code_start='910',
    code_end='993'
)

## CREATE EXCEL MATRIX FOR 1992 ##

FD_1992 = create_Y_matrix(
    filepath=r'1992_Use_NoZeroEntries_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_1992.xlsx',
    code_start='910',
    code_end='993'
)

## CREATE EXCEL MATRIX FOR 1997 ##

FD_1997 = create_Y_matrix(
    filepath=r'1997_Use_AfterRedefinitions_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_1997.xlsx',
    code_start='F01',
    code_end='F09'
)

## CREATE EXCEL MATRIX FOR 2002 ##

FD_2002 = create_Y_matrix(
    filepath=r'2002_Use_AfterRedefinitions_sorted_final_demand.xlsx',
    output_path='New Y-Matrices\Y_matrix_2002.xlsx',
    code_start='F01',
    code_end='F09'
)

## CREATE EXCEL MATRIX FOR 2007 ##

FD_2007 = create_Y_matrix(
    filepath=r'2007_12_IOUse_After_Redefinitions_PRO_DET_Python_sorted_final_demand.xlsx', sheet_name='2007',
    output_path='New Y-Matrices\Y_matrix_2007.xlsx',
    code_placement=(1, slice(2, None)),
    name_placement=(0, slice(2, None)),
    data_start_row=2,
    code_start='F01',
    code_end='F10'
)

## CREATE EXCEL MATRIX FOR 2012 ##

FD_2012 = create_Y_matrix(
    filepath=r'2007_12_IOUse_After_Redefinitions_PRO_DET_Python_sorted_final_demand.xlsx', sheet_name='2012',
    output_path='New Y-Matrices\Y_matrix_2012.xlsx',
    code_placement=(1, slice(2, None)),
    name_placement=(0, slice(2, None)),
    data_start_row=2,
    code_start='F01',
    code_end='F10'
)


Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1963.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1967.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1972.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1977.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1982.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1987.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1992.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_1997.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_2002.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_2007.xlsx
Filtered Final demand table saved to: New Y-Matrices\Y_matrix_2012.xlsx
        F01  F02  F06  F07  F10
code                           
F01000    1    0    0    0    0
F02E00    0    1    0    0    0
F02N00    0    1    0    0    0
F02R00    0    1    0    0    0
F02S00    0    1

In [10]:

## NO OUTPUT-PATH ## NO NEW FILES CREATED

import os
os.chdir(r'C:\Users\albin\Documents\Vienna\Semester 4\Thesis\2) Socio-metabolic branch\1) Capital formation\Jan GitHub repository\BEA benchmark IO tables & assembly')

import pandas as pd
import numpy as np

## CREATE Y-MATRIX DATA FRAMES FOR ALL YEARS ##

def create_Y_matrix(filepath, code_start, code_end,
                    drop_codes=None,
                    output_path=None, sheet_name=0,
                    code_placement=(0, slice(2, None)),
                    name_placement=(1, slice(2, None)),
                    data_start_row=3):
    
    # Load Jan's raw Benchmark tables without headers
    Z_raw = pd.read_excel(filepath, header=None, sheet_name=sheet_name)
    
    # Extract Industry codes and names (for most years excel row 0 for codes, row 1 for names), starting from column 2
    industry_codes = Z_raw.iloc[code_placement]
    industry_names = Z_raw.iloc[name_placement]
    
    # Filter Final demand columns: keep columns with the final demand industry codes (e.g. between 910 and 993, or F01 and F10)
    keep_mask = industry_codes.astype(str).str[:3].between(code_start, code_end)
    
    # Drop specified codes such as imports, exports, and change in private inventories for the current codes
    # isin() checks if code is in drop_codes and ~ negates those codes; industry_codes is then updated
    if drop_codes:
        drop_mask = ~industry_codes.isin(drop_codes)
        keep_mask = keep_mask & drop_mask
    
    final_demand_codes = industry_codes[keep_mask]
    final_demand_names = industry_names[keep_mask]
    
    # Re-establish Jan's MultiIndex format for columns, for the array values use labels "Y_code" and "Y_name"
    final_demand_multiindex = pd.MultiIndex.from_arrays(
        [final_demand_codes.values, final_demand_names.values],
        names=["Y_code", "Y_name"]
    )
    
    # Extract commodity codes and names (in most years these are found in excel row 3 onward, for cols 0 and 1)
    commodity_codes = Z_raw.iloc[data_start_row:, 0]
    commodity_names = Z_raw.iloc[data_start_row:, 1]
    
    # Re-establish Jan's MultiIndex format for rows, for the array values use labels "commodity_code" and "commodity_name"
    commodity_multiindex = pd.MultiIndex.from_arrays(
        [commodity_codes.values, commodity_names.values],
        names=["commodity_code", "commodity_name"]
    )
    
    # Extract filtered matrix columns: 
    # For each keep_mask filtered industry code (i.e. keep = True), calculate the actual column index (ignoring columns 1 and 2)
    # Then extract the data portion from row 2 on for each index element
    filtered_col_indices = [i + 2 for i, keep in enumerate(keep_mask) if keep]
    Y_data = Z_raw.iloc[data_start_row:, filtered_col_indices]
    
    # Apply multi-index structure
    Y_data.columns = final_demand_multiindex
    Y_data.index = commodity_multiindex
    
    # Save to Excel and indicate when each file has been created
    if output_path:
        Y_data.to_excel(output_path)
        print(f"Filtered Final demand table saved to: {output_path}")
        
    return Y_data

## CREATE MATRIX DATA FRAME FOR 1963 ##

FD_1963 = create_Y_matrix(
    filepath=r'1963_TotalTransactions.xlsx',
    code_start='966',
    code_end='989',
    drop_codes=['9680', '9690']
)

## CREATE MATRIX DATA FRAME FOR 1967 ##

FD_1967 = create_Y_matrix(
    filepath=r'1967_TotalTransactions.xlsx',
    code_start='910',
    code_end='989',
    drop_codes=['930000', '940000']
)

## CREATE MATRIX DATA FRAME FOR 1972 ##

FD_1972 = create_Y_matrix(
    filepath=r'1972_Use_NoZeroEntries_v2_sorted_final_demand.xlsx',
    code_start='910',
    code_end='993',
    drop_codes=['930000', '940000', '950000']
)

## CREATE MATRIX DATA FRAME FOR 1977 ##

FD_1977 = create_Y_matrix(
    filepath=r'1977_Use_NoZeroEntries_v2_sorted_final_demand.xlsx',
    code_start='910',
    code_end='993',
    drop_codes=['930000', '940000', '950000']
)

## CREATE MATRIX DATA FRAME FOR 1982 ##

FD_1982 = create_Y_matrix(
    filepath=r'1982_Use_NoZeroEntries_sorted_final_demand.xlsx',
    code_start='910',
    code_end='993',
    drop_codes=['930000', '940000', '950000']
)

## CREATE MATRIX DATA FRAME FOR 1987 ##

FD_1987 = create_Y_matrix(
    filepath=r'1987_Use_NoZeroEntries_sorted_final_demand.xlsx',
    code_start='910',
    code_end='993',
    drop_codes=['930000', '940000', '950000']
)

## CREATE MATRIX DATA FRAME FOR 1992 ##

FD_1992 = create_Y_matrix(
    filepath=r'1992_Use_NoZeroEntries_sorted_final_demand.xlsx',
    code_start='910',
    code_end='993',
    drop_codes=['930000', '940000', '950000']
)

## CREATE MATRIX DATA FRAME FOR 1997 ##

FD_1997 = create_Y_matrix(
    filepath=r'1997_Use_AfterRedefinitions_sorted_final_demand.xlsx',
    code_start='F01',
    code_end='F09',
    drop_codes=['F03000', 'F04000', 'F05000']
)

## CREATE MATRIX DATA FRAME FOR 2002 ##

FD_2002 = create_Y_matrix(
    filepath=r'2002_Use_AfterRedefinitions_sorted_final_demand.xlsx',
    code_start='F01',
    code_end='F09',
    drop_codes=['F03000', 'F04000', 'F05000']
)

## CREATE MATRIX DATA FRAME FOR 2007 ##

FD_2007 = create_Y_matrix(
    filepath=r'2007_12_IOUse_After_Redefinitions_PRO_DET_Python_sorted_final_demand.xlsx', sheet_name='2007',
    code_placement=(1, slice(2, None)),
    name_placement=(0, slice(2, None)),
    data_start_row=2,
    code_start='F01',
    code_end='F10',
    drop_codes=['F03000', 'F04000', 'F05000']
)

## CREATE MATRIX DATA FRAME FOR 2012 ##

FD_2012 = create_Y_matrix(
    filepath=r'2007_12_IOUse_After_Redefinitions_PRO_DET_Python_sorted_final_demand.xlsx', sheet_name='2012',
    code_placement=(1, slice(2, None)),
    name_placement=(0, slice(2, None)),
    data_start_row=2,
    code_start='F01',
    code_end='F10',
    drop_codes=['F03000', 'F04000', 'F05000']
)

## CONCORDANCE AGGREGATION ##

# 1997 USED AS BASE YEAR FOR AGGREGATION DUE TO HIGHEST AGGREGATE CATEGORIES

def build_crossyear_concordance(
    current_fd,
    custom_groups=None,
):
    
    # Get Y_codes from current FDs; Returns indexed values for a level of a MultiIndex.
    current_codes = current_fd.columns.get_level_values("Y_code").astype(str)
    
    # The DataFrame current_df is created with a single column "code" containing all "current_codes"
    current_df = pd.DataFrame({'code': current_codes})
    
    # The function assign_group_by_prefix matches the beginning characters (prefix) of the code as a string.
    def assign_group_by_lexical_range(code):
        for group_name, (low, high) in custom_groups:
            if str(low) <= code <= str(high):
                return group_name
        return None

    # Assign group
    current_df['group'] = current_df['code'].apply(assign_group_by_lexical_range)

    # Unmatched codes (those that don’t fall within any specified range) are dropped before generating the matrix
    current_df = current_df.dropna(subset=['group'])
    
    # Based on the string- or integer-based range groupings of the current_df
    # Return a dictionary with the specified prefixes and value=None (dict.fromkeys preserves the order); list converts them back into a list; 
    ref_groups = list(dict.fromkeys(current_df['group']))

    # Build binary matrix: np.zeros returns a new array of given shape and type, filled with zeros
    # Create a matrix of zeros with the rows-length of current_df, and the columns-length of ref_groups, the data type is integers
    concordance_matrix = np.zeros((len(current_df), len(ref_groups)), dtype=int)
    
    # Then for each reference group (aggregate column categories i in ref_groups), mark the rows where the current code belongs to that group (based on N-digit prefix)
    # concordance_matrix[:, i] uses NumPy slicing to select all rows (:) in aggregate column group i of the concordance_matrix
    # Then a Boolean Series is created with True where the row's group equals aggregate category 'group', == indicating False otherwise
    # Finally, Astype (integers) converts the True/False Boolean into 1/0
    for i, group in enumerate(ref_groups):
        concordance_matrix[:, i] = (current_df['group'] == group).astype(int)
    
    # Finally a dataframe is created based on the concordance matrix with the row index current codes and column index ref_groups
    concordance_df = pd.DataFrame(concordance_matrix, index=current_df['code'], columns=ref_groups)

    return concordance_df


## CUSTOM GROUPS FOR YEAR 1963 ##

# Define custom groupings using numeric ranges
custom_groups_1963 = [
    ("PCE", (9660, 9660)),
    ("Private GFCF", (9670, 9670)),
    ("Federal Government purchases, defense", (9710, 9710)),
    ("Federal Government purchases, other", (9720, 9720)),
    ("State & local government purchases", (9860, 9890))
]

## CUSTOM GROUPS FOR YEAR 1967 ##

custom_groups_1967 = [
    ("PCE", (910000, 910000)),
    ("Private GFCF", (920000, 920000)),
    ("Federal Government purchases, defense", (971000, 971000)),
    ("Federal Government purchases, other", (972000, 972000)),
    ("State & local government purchases", (986000, 989000))
]

## CUSTOM GROUPS FOR YEARS 1972-1992 ##

custom_groups_1972_1992 = [
    ("PCE", (910000, 910000)),
    ("Private GFCF", (920000, 920000)),
    ("Federal Government purchases, defense", ('960000', '9600I0')),
    ("Federal Government purchases, other", ('970000', '9700I0')),
    ("State & local government purchases", ('980000', '9930I9'))
]

## CUSTOM GROUPS FOR YEARS 1997-2012 ##

# Define custom groupings using prefix ranges '' inputted correctly?
custom_groups_1997_2012 = [
    ("PCE", ('F01000', 'F01000')),
    ("Private GFCF", ('F02000', 'F02S00')),
    ("Federal Government purchases, defense", ('F06C00', 'F06S00')),
    ("Federal Government purchases, other", ('F07C00', 'F07S00')),
    ("State & local government purchases", ('F08C00', 'F10S00'))
]

## AGGREGATION FOR 1963 ## Federal 2 categories, State and local 4 categories

# Call the concordance builder
concordance_1963 = build_crossyear_concordance(
    current_fd=FD_1963,
    custom_groups=custom_groups_1963,
)

## AGGREGATION FOR 1967 ##

concordance_1967 = build_crossyear_concordance(
    current_fd=FD_1967,
    custom_groups=custom_groups_1967,
)

## AGGREGATION FOR 1972 ##

concordance_1972 = build_crossyear_concordance(
    current_fd=FD_1972,
    custom_groups=custom_groups_1972_1992,
)

## AGGREGATION FOR 1977 ## 

concordance_1977 = build_crossyear_concordance(
    current_fd=FD_1977,
    custom_groups=custom_groups_1972_1992,
)

## AGGREGATION FOR 1982 ## Federal 2 categories, State and local 19 categories

concordance_1982 = build_crossyear_concordance(
    current_fd=FD_1982,
    custom_groups=custom_groups_1972_1992,
)

## AGGREGATION FOR 1987 ##

concordance_1987 = build_crossyear_concordance(
    current_fd=FD_1987,
    custom_groups=custom_groups_1972_1992,
)

## AGGREGATION FOR 1992 ##

concordance_1992 = build_crossyear_concordance(
    current_fd=FD_1992,
    custom_groups=custom_groups_1972_1992,
)

## AGGREGATION FOR 1997 ## Federal 2 categories, State and local 2 categories

concordance_1997 = build_crossyear_concordance(
    current_fd=FD_1997,
    custom_groups=custom_groups_1997_2012,
)

## AGGREGATION FOR 2002 ##

concordance_2002 = build_crossyear_concordance(
    current_fd=FD_2002,
    custom_groups=custom_groups_1997_2012,
)

## AGGREGATION FOR 2007 ## Federal 2 categories, State and local 1 category

concordance_2007 = build_crossyear_concordance(
    current_fd=FD_2007,
    custom_groups=custom_groups_1997_2012,
)

## AGGREGATION FOR 2012 ##

concordance_2012 = build_crossyear_concordance(
    current_fd=FD_2012,
    custom_groups=custom_groups_1997_2012,
)

 # inspect specific concordances
 print(f'({concordance_2012})')


In [11]:
## CREATE NEW AGGREGATE MATRICES ##

def aggregate_fd_with_concordance(FD_data, concordance_df, output_path=None):
    # Standardize the Y_codes from FD_data and the concordance index
    # First extract the Y_code column of the FD MultiIndex data and convert it to string to ensure it's compatible with concordance_df.index
    # Then extract the concordance index and ensure it is also in string format (currently some are not, see 1963, 1967)
    # The strip() method removes leading and trailing whitespace (spaces, tabs, newlines) from strings
    fd_ycodes = FD_data.columns.get_level_values("Y_code").astype(str).str.strip()
    concordance_df.index = concordance_df.index.astype(str).str.strip()
    
    # Identify and mask matching columns
    # Concordance rows should match Y_code columns: create a boolean mask: True for columns in FD_data that have a matching row in the concordance
    matched_cols = fd_ycodes.isin(concordance_df.index)
    
    if not matched_cols.any():
        raise ValueError("No matching Y_codes found between FD_data and concordance.")

    # Filter FD_data columns that exist in concordance;
    # Select only the columns in FD_data that exist in the concordance; Example shape: FD_matched(385, 20) = FD_data(385, 25) x concordance cols(25,)
    # Keep the Y_codes corresponding to the matched columns for ordering the matrix: Example shape: fd_ycodes_matched(20,) x concordance cols(25,)
    FD_matched = FD_data.loc[:, matched_cols]
    fd_ycodes_matched = fd_ycodes[matched_cols]
    
    # Ensure matching order in concordance
    # Retrieve the rows in the concordance matrix in the exact same order as the columns in FD_matrix
    concordance_ordered = concordance_df.loc[fd_ycodes_matched]
    
    # First Wraps FD_matched and concordance_matrix in DataFrames (defensive; FD_matched is already likely a DataFrame)
    # pd.to_numeric ensures all values are numeric, non-numeric strings are converted to NaN, then filled with 0 to ensure matrix math will work (no empty cells)
    FD_matrix = pd.DataFrame(FD_matched).apply(pd.to_numeric, errors='coerce').fillna(0).values
    concordance_matrix = pd.DataFrame(concordance_ordered).apply(pd.to_numeric, errors='coerce').fillna(0).values

    # Matrix multiplication shape: (commodities × Y_codes) × (Y_codes × groups) → (commodities × groups)
    aggregated_matrix = FD_matrix @ concordance_matrix

    # Build new DataFrame with same MultiIndex rows and new column groups
    aggregated_df = pd.DataFrame(
        aggregated_matrix,
        index=FD_data.index.copy(),  # Preserve the original MultiIndex rows
        columns=concordance_df.columns  # New grouped column names
    )

    # Optional Excel output if output path is defined
    if output_path:
        aggregated_df.to_excel(output_path)
        print(f"Aggregated final demand table saved to: {output_path}")

    return aggregated_df

## AGGREGATION FOR 1963 ##

FD_aggregated_1963 = aggregate_fd_with_concordance(
    FD_data=FD_1963,
    concordance_df=concordance_1963,
    output_path="Aggregated Y-matrices/FD_1963_aggregated.xlsx"
)

## AGGREGATION FOR 1967 ##

FD_aggregated_1967 = aggregate_fd_with_concordance(
    FD_data=FD_1967,
    concordance_df=concordance_1967,
    output_path="Aggregated Y-matrices/FD_1967_aggregated.xlsx"
)

## AGGREGATION FOR 1972 ##

FD_aggregated_1972 = aggregate_fd_with_concordance(
    FD_data=FD_1972,
    concordance_df=concordance_1972,
    output_path="Aggregated Y-matrices/FD_1972_aggregated.xlsx"
)

## AGGREGATION FOR 1977 ##

FD_aggregated_1977 = aggregate_fd_with_concordance(
    FD_data=FD_1977,
    concordance_df=concordance_1977,
    output_path="Aggregated Y-matrices/FD_1977_aggregated.xlsx"
)

## AGGREGATION FOR 1982 ##

FD_aggregated_1982 = aggregate_fd_with_concordance(
    FD_data=FD_1982,
    concordance_df=concordance_1982,
    output_path="Aggregated Y-matrices/FD_1982_aggregated.xlsx"
)

## AGGREGATION FOR 1987 ##

FD_aggregated_1987 = aggregate_fd_with_concordance(
    FD_data=FD_1987,
    concordance_df=concordance_1987,
    output_path="Aggregated Y-matrices/FD_1987_aggregated.xlsx"
)

## AGGREGATION FOR 1992 ##

FD_aggregated_1992 = aggregate_fd_with_concordance(
    FD_data=FD_1992,
    concordance_df=concordance_1992,
    output_path="Aggregated Y-matrices/FD_1992_aggregated.xlsx"
)

## AGGREGATION FOR 1997 ##

FD_aggregated_1997 = aggregate_fd_with_concordance(
    FD_data=FD_1997,
    concordance_df=concordance_1997,
    output_path="Aggregated Y-matrices/FD_1997_aggregated.xlsx"
)

## AGGREGATION FOR 2002 ##

FD_aggregated_2002 = aggregate_fd_with_concordance(
    FD_data=FD_2002,
    concordance_df=concordance_2002,
    output_path="Aggregated Y-matrices/FD_2002_aggregated.xlsx"
)

## AGGREGATION FOR 2007 ##

FD_aggregated_2007 = aggregate_fd_with_concordance(
    FD_data=FD_2007,
    concordance_df=concordance_2007,
    output_path="Aggregated Y-matrices/FD_2007_aggregated.xlsx"
)

## AGGREGATION FOR 2012 ##

FD_aggregated_2012 = aggregate_fd_with_concordance(
    FD_data=FD_2012,
    concordance_df=concordance_2012,
    output_path="Aggregated Y-matrices/FD_2012_aggregated.xlsx"
)



Aggregated final demand table saved to: Aggregated Y-matrices/FD_1963_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1967_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1972_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1977_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1982_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1987_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1992_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_1997_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_2002_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_2007_aggregated.xlsx
Aggregated final demand table saved to: Aggregated Y-matrices/FD_2012_aggregated.xlsx


In [9]:

## Checking for any inconsistencies in data transformations ##

def inspect_fd_and_concordance(FD_data, concordance_df):
    # Print unique column codes and groupings
    print("\nUnique Y_codes in FD_data:")
    print(FD_data.columns.get_level_values("Y_code").unique().tolist())
    
    print("\nConcordance index (should match Y_codes):")
    print(concordance_df.index.tolist())
    
    print("\nConcordance columns (group names):")
    print(concordance_df.columns.tolist())
    
    print("\nAny 'None' or NaN group names in concordance columns?")
    print([col for col in concordance_df.columns if pd.isna(col) or col in ('None', None, '')])

    # Check if any columns in concordance are all zeros
    print("\nColumns in concordance that are all zeros (likely causing empty aggregation columns):")
    all_zero_cols = concordance_df.columns[(concordance_df == 0).all(axis=0)]
    print(all_zero_cols.tolist())

    # Optional: check if any Y_codes in FD_data are missing from concordance index
    missing_ycodes = set(FD_data.columns.get_level_values("Y_code").astype(str)) - set(concordance_df.index.astype(str))
    print("\nY_codes in FD_data not found in concordance index:")
    print(missing_ycodes)

# Example usage:
inspect_fd_and_concordance(FD_1963, concordance_1963)





Unique Y_codes in FD_data:
[9660, 9670, 9680, 9690, 9710, 9720, 9860, 9870, 9880, 9890]

Concordance index (should match Y_codes):
['9660', '9670', '9680', '9690', '9710', '9720', '9860', '9870', '9880', '9890']

Concordance columns (group names):
['PCE', 'Private GFCF', None, 'Federal Government purchases, defense', 'Federal Government purchases, other', 'State & local government purchases']

Any 'None' or NaN group names in concordance columns?
[None]

Columns in concordance that are all zeros (likely causing empty aggregation columns):
[None]

Y_codes in FD_data not found in concordance index:
set()
