In [14]:
import pandas as pd

In [15]:
file_path = './umich.edu_Personal Finances_age_groups.csv'
data = pd.read_csv(file_path)

# Display original
data.head()

Unnamed: 0,yyyymm,pago_r_a1834,pago_r_a3554,pago_r_a5597,pagorn_ny_a1834,pagorn_ny_a3554,pagorn_ny_a5597,pexp_r_a1834,pexp_r_a3554,pexp_r_a5597,...,rinc_r_a5597,pinc2_mean_a1834,pinc2_mean_a3554,pinc2_mean_a5597,pinc_mean_a1834,pinc_mean_a3554,pinc_mean_a5597,pjob_mean_a1834,pjob_mean_a3554,pjob_mean_a5597
0,197801,130,105,89,39,19,-5,144,117,94,...,59,,,,,,,,,
1,197802,133,111,86,35,19,-1,143,106,92,...,64,,,,,,,,,
2,197803,132,89,82,36,12,-5,139,101,84,...,52,,,,,,,,,
3,197804,126,99,87,30,17,3,144,108,84,...,53,,,,,,,,,
4,197805,139,97,81,38,16,-7,140,112,84,...,49,,,,,,,,,


In [16]:
# Rename the 'yyyymm' column 
data.rename(columns={'yyyymm': 'Date'}, inplace=True)

In [17]:
# Melt the dataframe into a long format to aggregate demographic-specific columns under 'Group'
data_long = pd.melt(
    data,
    id_vars=['Date'],
    value_vars=[col for col in data.columns if col != 'Date'],
    var_name='Metric_Age',
    value_name='Value'
)

In [18]:
# Split 'Metric_Age' into 'Metric' and 'Group' for better organization
data_long[['Metric', 'Group']] = data_long['Metric_Age'].str.extract(r'^(.*)_([^_]+)$')

In [19]:
# Define desired column order for the metrics
desired_columns = [
    'pago_r', 'pagorn_ny', 'pexp_r', 'ptrd_r', 'pago5_r', 'pexp5_r', 'ptrd5_r', 
    'inex_med', 'inex_r', 'rinc_r', 'pinc2_mean', 'pinc_mean', 'pjob_mean'
]

In [20]:
# Pivot table to reorganize metrics into columns and reindex with desired metrics
final_data = (
    data_long.pivot_table(
        index=['Date', 'Group'],
        columns='Metric',
        values='Value'
    )
    .reindex(columns=desired_columns, fill_value=None)
    .reset_index()
)

In [21]:
# Add placeholder columns for demographics
final_data['Age'] = final_data['Group']  # Placeholder for Age

In [24]:
# Reorder the dataframe columns to match the desired format
final_data = final_data[
    ['Date', 'pago_r', 'pagorn_ny', 'pexp_r', 'ptrd_r', 'pago5_r', 'pexp5_r', 'ptrd5_r', 
     'inex_med', 'inex_r', 'rinc_r', 'pinc2_mean', 'pinc_mean', 'pjob_mean', 
     'Group', 'Age']
]

In [25]:
final_data 

Metric,Date,pago_r,pagorn_ny,pexp_r,ptrd_r,pago5_r,pexp5_r,ptrd5_r,inex_med,inex_r,rinc_r,pinc2_mean,pinc_mean,pjob_mean,Group,Age
0,197801,130.0,39.0,144.0,146.0,,,,,,93.0,,,,a1834,a1834
1,197801,105.0,19.0,117.0,113.0,,,,,,60.0,,,,a3554,a3554
2,197801,89.0,-5.0,94.0,89.0,,,,,,59.0,,,,a5597,a5597
3,197802,133.0,35.0,143.0,149.0,,,,9.0,164.0,97.0,,,,a1834,a1834
4,197802,111.0,19.0,106.0,114.0,,,,5.0,148.0,77.0,,,,a3554,a3554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1684,202410,77.0,-2.0,124.0,97.0,97.0,143.0,122.0,2.0,153.0,61.0,50.0,34.2,23.2,a3554,a3554
1685,202410,74.0,-6.0,96.0,83.0,82.0,97.0,89.0,0.5,135.0,48.0,34.8,21.0,12.6,a5597,a5597
1686,202411,74.0,-4.0,131.0,100.0,129.0,160.0,148.0,3.9,168.0,87.0,61.2,38.9,26.5,a1834,a1834
1687,202411,78.0,2.0,118.0,97.0,110.0,135.0,124.0,1.7,145.0,63.0,51.0,31.1,25.7,a3554,a3554


In [26]:
# Save the restructured file
output_path ='./Personal_Finances_AgeGroups_restructured_data.csv'
final_data.to_csv(output_path, index=False)

print(f"Restructured data saved to: {output_path}")

Restructured data saved to: ./Personal_Finances_AgeGroups_restructured_data.csv
