In [76]:
import polars as pl
import pandas as pd
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import subprocess
import stat 

def gen_spc_file_text(data_filename):
    spc_text = r"""series{
  file=""" + data_filename + r"""
  period=12
  format=datevalue
}
transform{
  function=auto
}
x11{ save=d11 }"""
    return spc_text
def convert_leading_zeroes_to_nan(df):
    def replace_leading_zeroes(series):
        non_zero_found = False
        new_series = []
        for value in series:
            if value != 0:
                non_zero_found = True
            if not non_zero_found and value == 0:
                new_series.append(np.nan)
            else:
                new_series.append(value)
        return pd.Series(new_series, index=series.index)
    
    return df.apply(replace_leading_zeroes, axis=0)
    
def list_txt_files(directory):
    # List all files in the specified directory
    files = os.listdir(directory)
    # Filter and return only files that end with .txt
    txt_files = [file for file in files if file.endswith('.txt')]
    return txt_files
def read_csv_and_prep_df(filename, midpoint = True):
    df = pd.read_csv(filename)
    df = convert_leading_zeroes_to_nan(df)
    if midpoint:
        for col in df.columns[2:]: df[col] = df[col].astype(float)
        df.iloc[:, 2:] = df.iloc[:, 2:].replace(0., np.nan)
        df = df.interpolate(method='linear', limit = 100, limit_area = 'inside')
    df.columns = [i.replace(' - ', '__').replace(' ', 'ppp') for i in df.columns]
    return df
def list_files_with_extension(directory, ext):
    files = os.listdir(directory)
    txt_files = [file for file in files if file.endswith(ext)]
    return txt_files
def gen_state_level_df(file_name, folder_path):
    file_path = folder_path + file_name
    col_names = ['date', file_name.replace(folder_path, '')[5:-4]]
    df = pd.read_csv(file_path, sep = '\t', header = None, skiprows = 2, names = col_names)
    return df
def make_x13_files_from_df(df, folder):
    os.system(f'mkdir -p {folder}')
    for var in df.columns:
        if var not in ['YEAR', 'MONTH']:
            df[['YEAR', 'MONTH', var]].dropna().to_csv(f'{folder}{var}.txt', sep=' ', index=False, header=False)
    for raw_data_file in list_txt_files(folder):
        file = open(f'{folder}spec_' + raw_data_file[:-4] + '.spc', 'w')
        file.write(gen_spc_file_text(raw_data_file))
        file.close()
def make_x13_files_from_csv(filename, folder, midpoint = True):
    df = read_csv_and_prep_df(filename, midpoint = True)
    make_x13_files_from_df(df, folder)
def join_x13_results(folder):
    x13_results_files = list_files_with_extension(folder, '.d11')
    results = [gen_state_level_df(file, folder) for file in x13_results_files]
    total_df = results[0]
    for state_df in results[1:]: total_df = total_df.merge(state_df, on = 'date', how = 'outer')
    total_df['date'] = pd.to_datetime(total_df['date'], format='%Y%m')
    total_df.columns = [i.replace('__', ' - ').replace('ppp', ' ') for i in total_df.columns]
    return total_df
def gen_pct_change_df(df):
    df_pct_change = df.copy()
    df_pct_change.iloc[:, 1:] = df_pct_change.iloc[:, 1:].pct_change(fill_method = None)
    df_pct_change = df_pct_change.iloc[1:, :]
    return df_pct_change

In [97]:
raw_data_file = 'raw_vals_series_1960_2024.csv'
aux_folder = 'raw_vals_midpoint_1960_2024/'
result_filename = 'x13_vals_1960_2024.parquet'
pct_change_filename = 'x13_vals_pct_change_1960_2024.parquet'
pct_change_filename_stata = 'x13_vals_pct_change_1960_2024.dta'
make_x13_files_from_csv(raw_data_file, aux_folder)
os.system(f'cp run_seasonal_adjustment.sh {aux_folder}')
os.system(f'cp x13as_html {aux_folder}')
os.chmod(f'{aux_folder}x13as_html', os.stat(f'{aux_folder}x13as_html').st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
os.chmod(f'{aux_folder}run_seasonal_adjustment.sh', os.stat(f'{aux_folder}run_seasonal_adjustment.sh').st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
os.system(f'./{aux_folder}run_seasonal_adjustment.sh')

./raw_vals_midpoint_1960_2024/run_seasonal_adjustment.sh: line 10: ./x13as_html: Permission denied


32256

In [98]:
df_x13 = join_x13_results(aux_folder)
df_pct_change = gen_pct_change_df(df_x13)
df_x13.to_parquet(result_filename)
df_pct_change.to_parquet(pct_change_filename)
df_pct_change.columns = (
    df_pct_change.columns
    .str.replace(' - ', '__')
    .str.replace(' ', '_ooo_')
)
df_pct_change.to_stata(pct_change_filename_stata, convert_dates={'date': 'tm'})

In [99]:
df_pct_change

Unnamed: 0,date,Vermont__SFH,Iowa__TOT,Rhode_ooo_Island__SFH,Montana__SFH,Kansas__SFH,Maryland__MFH,New_ooo_Jersey__S_MFH,Montana__S_MFH,Indiana__SFH,...,Idaho__L_MFH,Idaho__SFH,Mississippi__TOT,Minnesota__TOT,Arizona__TOT,South_ooo_Carolina__SFH,Mississippi__SFH,Colorado__SFH,South_ooo_Carolina__S_MFH,Georgia__TOT
1,1960-06-01,0.000790,0.116949,0.185429,-0.158898,0.201771,-0.892821,0.091509,0.012482,0.049943,...,,0.190379,-0.320576,-0.032378,0.247124,-0.082931,0.081717,-0.045806,-0.686553,0.053823
2,1960-07-01,-0.310505,-0.001376,-0.035525,-0.386936,0.289499,2.199775,-0.036659,-0.080298,-0.113254,...,,-0.124987,-0.326370,-0.074283,-0.167817,-0.018970,-0.250680,0.096058,1.368142,-0.123558
3,1960-08-01,0.382570,0.030382,0.127982,0.925122,0.025068,-0.212511,-0.453425,0.224530,-0.173112,...,-0.328625,-0.002294,-0.159268,0.213095,-0.014083,-0.029941,-0.276595,-0.025348,-0.153026,-0.014386
4,1960-09-01,-0.189470,-0.099691,-0.047025,0.167003,0.280469,-0.012229,0.662651,-0.794070,0.248872,...,-0.256582,0.130742,1.178419,0.049025,0.214221,0.017002,0.550438,0.108766,-0.387241,0.157343
5,1960-10-01,0.369625,-0.186043,-0.161733,-0.066425,-0.449965,-0.658853,-0.004410,3.325936,-0.192593,...,-0.818885,0.000675,-0.387357,-0.010494,-0.241338,-0.030590,-0.063195,-0.076060,0.553373,-0.145699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
767,2024-04-01,0.263958,-0.112291,0.084784,-0.002938,0.155791,0.354396,0.484132,-0.309736,0.112534,...,-0.525741,0.147233,0.477408,0.044103,0.058647,0.102077,0.459786,0.200718,0.774635,0.029550
768,2024-05-01,0.072335,0.083019,-0.079490,0.216129,-0.106055,-0.345661,0.092566,1.718479,-0.146401,...,0.664094,-0.021162,-0.057976,0.013494,-0.072725,-0.015727,-0.043768,0.057364,0.188871,0.002233
769,2024-06-01,-0.200148,-0.031639,0.077170,0.717741,-0.017351,-0.359165,-0.368447,-0.514512,0.055024,...,-0.667448,0.020964,-0.067415,-0.021676,0.017586,-0.090113,-0.092732,-0.153208,0.828497,-0.117085
770,2024-07-01,0.304691,0.223555,-0.080244,-0.195547,0.080940,0.621934,0.093416,0.340767,0.065452,...,-0.519744,0.081297,0.194219,0.153308,-0.076446,0.145224,0.197442,-0.002561,-0.212305,0.066114
