In [28]:
import pandas as pd
import time
from mrio import MRIO
from utils import get_years, aggregate_sectors, convert_dtypes, ind_pattern, progress_check

start = time.time()
mrio_versions = ['72', '62', '62c']

version = '62'

input_file = f'mrio-{version}.parquet'
output_file = f'A-{version}.parquet'

# 获取年份列表
try:
    years = get_years(f'data/{input_file}')
except Exception as e:
    print(f"Error getting years: {e}")
    exit(1)

df = pd.DataFrame()

def process_year(year, input_file):
    try:
        mrio = MRIO(f'data/{input_file}', year, full=True)
    except Exception as e:
        print(f"Error initializing MRIO for year {year}: {e}")
        return None

    country_indices = mrio.country_inds()
    sector_indices = mrio.sector_inds()

    index = pd.MultiIndex.from_product([country_indices, sector_indices], names=['country ind', 'sector ind'])
    columns = pd.MultiIndex.from_product([country_indices, sector_indices], names=['country ind', 'sector ind'])

    A_df = pd.DataFrame(mrio.A.data, index=index, columns=columns)
    A_df.reset_index(inplace=True)

    A_long = A_df.melt(id_vars=['country ind', 'sector ind'], 
                       var_name=['country ind_2', 'sector ind_2'], 
                       value_name='A')

    A_long.rename(columns={'country ind': 'country_from', 'sector ind': 'sector_from',
                           'country ind_2': 'country_to', 'sector ind_2': 'sector_to'}, inplace=True)

    A_long['t'] = mrio.year
    A_long = A_long[['t', 'country_from', 'country_to', 'sector_from', 'sector_to', 'A']]
    
    return A_long

for year in years:
    df_t = process_year(year, input_file)
    if df_t is not None:
        df = pd.concat([df, df_t], ignore_index=True)

# 保存最终结果
# try:
#     df.to_parquet(output_file)
# except Exception as e:
#     print(f"Error saving DataFrame to parquet: {e}")

In [29]:
df

Unnamed: 0,t,country_from,country_to,sector_from,sector_to,A
0,2019,1,1,1,1,0.164746
1,2019,1,1,2,1,0.002941
2,2019,1,1,3,1,0.017330
3,2019,1,1,4,1,0.000271
4,2019,1,1,5,1,0.000041
...,...,...,...,...,...,...
9724045,2023,63,63,31,35,0.000000
9724046,2023,63,63,32,35,0.000000
9724047,2023,63,63,33,35,0.000000
9724048,2023,63,63,34,35,0.000000


In [30]:
df[df.country_from == 8][df.country_to == 8].to_excel(f'A_long_matrix_{version}-{year}.xlsx')

  df[df.country_from == 8][df.country_to == 8].to_excel(f'A_long_matrix_{version}-{year}.xlsx')
