In [76]:
import polars as pl
import pandas as pd
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import subprocess
import stat 

def gen_spc_file_text(data_filename):
    spc_text = r"""series{
  file=""" + data_filename + r"""
  period=12
  format=datevalue
}
transform{
  function=auto
}
x11{ save=d11 }"""
    return spc_text
def convert_leading_zeroes_to_nan(df):
    def replace_leading_zeroes(series):
        non_zero_found = False
        new_series = []
        for value in series:
            if value != 0:
                non_zero_found = True
            if not non_zero_found and value == 0:
                new_series.append(np.nan)
            else:
                new_series.append(value)
        return pd.Series(new_series, index=series.index)
    
    return df.apply(replace_leading_zeroes, axis=0)
    
def list_txt_files(directory):
    # List all files in the specified directory
    files = os.listdir(directory)
    # Filter and return only files that end with .txt
    txt_files = [file for file in files if file.endswith('.txt')]
    return txt_files
def read_csv_and_prep_df(filename, midpoint = True):
    df = pd.read_csv(filename)
    df = convert_leading_zeroes_to_nan(df)
    if midpoint:
        for col in df.columns[2:]: df[col] = df[col].astype(float)
        df.iloc[:, 2:] = df.iloc[:, 2:].replace(0., np.nan)
        df = df.interpolate(method='linear', limit = 100, limit_area = 'inside')
    df.columns = [i.replace(' - ', '__').replace(' ', 'ppp') for i in df.columns]
    return df
def list_files_with_extension(directory, ext):
    files = os.listdir(directory)
    txt_files = [file for file in files if file.endswith(ext)]
    return txt_files
def gen_state_level_df(file_name, folder_path):
    file_path = folder_path + file_name
    col_names = ['date', file_name.replace(folder_path, '')[5:-4]]
    df = pd.read_csv(file_path, sep = '\t', header = None, skiprows = 2, names = col_names)
    return df
def make_x13_files_from_df(df, folder):
    os.system(f'mkdir -p {folder}')
    for var in df.columns:
        if var not in ['YEAR', 'MONTH']:
            df[['YEAR', 'MONTH', var]].dropna().to_csv(f'{folder}{var}.txt', sep=' ', index=False, header=False)
    for raw_data_file in list_txt_files(folder):
        file = open(f'{folder}spec_' + raw_data_file[:-4] + '.spc', 'w')
        file.write(gen_spc_file_text(raw_data_file))
        file.close()
def make_x13_files_from_csv(filename, folder, midpoint = True):
    df = read_csv_and_prep_df(filename, midpoint = True)
    make_x13_files_from_df(df, folder)
def join_x13_results(folder):
    x13_results_files = list_files_with_extension(folder, '.d11')
    results = [gen_state_level_df(file, folder) for file in x13_results_files]
    total_df = results[0]
    for state_df in results[1:]: total_df = total_df.merge(state_df, on = 'date', how = 'outer')
    total_df['date'] = pd.to_datetime(total_df['date'], format='%Y%m')
    total_df.columns = [i.replace('__', ' - ').replace('ppp', ' ') for i in total_df.columns]
    return total_df
def gen_pct_change_df(df):
    df_pct_change = df.copy()
    df_pct_change.iloc[:, 1:] = df_pct_change.iloc[:, 1:].pct_change(fill_method = None)
    df_pct_change = df_pct_change.iloc[1:, :]
    return df_pct_change

In [103]:
raw_data_file = 'raw_perms_series_1960_2024.csv'
aux_folder = 'raw_perms_midpoint_1960_2024/'
result_filename = 'x13_perms_1960_2024.parquet'
pct_change_filename = 'x13_perms_pct_change_1960_2024.parquet'
pct_change_filename_stata = 'x13_perms_pct_change_1960_2024.dta'
make_x13_files_from_csv(raw_data_file, aux_folder)
os.system(f'cp run_seasonal_adjustment.sh {aux_folder}')
os.system(f'cp x13as_html {aux_folder}')
os.chmod(f'{aux_folder}x13as_html', os.stat(f'{aux_folder}x13as_html').st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
os.chmod(f'{aux_folder}run_seasonal_adjustment.sh', os.stat(f'{aux_folder}run_seasonal_adjustment.sh').st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
os.system(f'./{aux_folder}run_seasonal_adjustment.sh')

./raw_perms_midpoint_1960_2024/run_seasonal_adjustment.sh: line 10: ./x13as_html: Permission denied


32256

In [104]:
df_x13 = join_x13_results(aux_folder)
df_pct_change = gen_pct_change_df(df_x13)
df_x13.to_parquet(result_filename)
df_pct_change.to_parquet(pct_change_filename)
df_pct_change.columns = (
    df_pct_change.columns
    .str.replace(' - ', '__')
    .str.replace(' ', '_p_')
)
df_pct_change.to_stata(pct_change_filename_stata, convert_dates={'date': 'tm'})

In [105]:
df_pct_change

Unnamed: 0,date,Vermont__SFH,Iowa__TOT,Rhode_p_Island__SFH,Montana__SFH,Kansas__SFH,Maryland__MFH,New_p_Jersey__S_MFH,Montana__S_MFH,Indiana__SFH,...,Idaho__L_MFH,Idaho__SFH,Mississippi__TOT,Minnesota__TOT,Arizona__TOT,South_p_Carolina__SFH,Mississippi__SFH,Colorado__SFH,South_p_Carolina__S_MFH,Georgia__TOT
1,1960-06-01,-0.017656,0.215610,0.188384,-0.296700,0.139291,-0.876304,0.081671,-0.102999,0.034264,...,,0.123113,-0.162798,-0.047554,0.170971,-0.070885,0.099563,-0.047595,-0.711482,-0.132717
2,1960-07-01,-0.314615,-0.062453,-0.026399,-0.259795,0.211817,3.198187,-0.012530,0.004087,-0.123427,...,,-0.017078,-0.309802,0.021455,-0.179068,-0.055774,-0.201466,0.092541,1.580114,-0.056060
3,1960-08-01,0.460259,0.016957,0.100683,0.836353,0.028417,-0.216051,-0.380292,0.433766,-0.119669,...,-0.290511,-0.061649,0.164331,0.083669,0.072358,-0.021608,0.045685,0.029303,0.286459,-0.068824
4,1960-09-01,-0.156524,-0.166387,-0.078999,0.118635,0.420707,-0.106194,0.515861,-0.790924,0.173900,...,-0.291848,0.286428,0.544342,0.163324,0.209339,0.088243,0.042507,0.026318,-0.419555,0.121843
5,1960-10-01,0.093000,-0.126608,-0.105863,-0.054357,-0.534705,-0.732504,-0.244861,3.103498,-0.165027,...,-0.560441,-0.290176,-0.382919,-0.083883,-0.249625,-0.060494,0.003535,-0.119556,0.312709,-0.059327
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
767,2024-04-01,0.262230,-0.109576,0.062097,0.038309,0.112585,0.718164,1.463223,-0.407037,0.148502,...,-0.296011,0.231866,0.523055,0.097783,0.059994,0.156842,0.522288,0.213620,1.156431,0.107923
768,2024-05-01,-0.075408,0.137390,0.101677,0.111042,-0.066382,-0.414132,-0.075750,1.270626,-0.203283,...,-0.001761,0.053165,-0.188918,-0.008548,-0.109458,-0.039482,-0.177947,-0.069030,0.080594,-0.061218
769,2024-06-01,-0.113715,0.019673,0.106472,0.493107,0.011006,-0.321315,-0.540674,-0.433081,0.195652,...,-0.534401,-0.102811,0.013942,0.043661,0.054809,-0.140556,0.021983,-0.110447,0.446194,-0.062483
770,2024-07-01,0.258490,0.522774,-0.076399,0.022696,0.031848,0.403938,0.544831,0.715735,-0.004164,...,-0.505169,0.165519,0.163193,0.109193,-0.049009,0.153643,0.137050,0.075290,-0.106495,-0.079264
