In [None]:
import pandas as pd
import glob
import os

def aggregate_wage_data(main_dir):
    """
    Aggregates CSV files from the main directory into one DataFrame.
    
    Args:
    - main_dir (str): The path to the main directory containing year folders.
    
    Returns:
    - pd.DataFrame: A DataFrame with the aggregated data from all CSV files.
    """
    # Use glob to get all the CSV files in the directory structure
    all_files = glob.glob(os.path.join(main_dir, '**', '*.csv'), recursive=True)
    
    # Initialize an empty list to hold data from each file
    data_list = []
    
    # Loop through the files and read them into pandas DataFrames
    for file in all_files:
        # Read the CSV file
        df = pd.read_csv(file)
        
        # Extract the state and yr_qtr from the file name
        file_name = os.path.basename(file)
        parts = file_name.split('_')
        state = parts[1]
        year = parts[2]
        quarter = parts[3].split('.')[0]
        yr_qtr = f'{year}_Q{quarter}'
        
        # Add the extracted state and yr_qtr as new columns to the DataFrame
        df['state'] = state
        df['yr_qtr'] = yr_qtr
        
        # Select the relevant columns: 'state', 'yr_qtr', 'count', 'sector2', 'sector3'
        df_subset = df[['state', 'yr_qtr', 'count', 'sector2', 'sector3']]
        
        # Append the subset DataFrame to the list
        data_list.append(df_subset)
    
    # Concatenate all the DataFrames into one
    final_df = pd.concat(data_list, ignore_index=True)
    
    return final_df

# Usage example
main_directory = 'path_to_your_main_directory/NM'
aggregated_data = aggregate_wage_data(main_directory)

# Print or save the aggregated data
print(aggregated_data)

# Optionally, save the final DataFrame to a CSV file
aggregated_data.to_csv('aggregated_wage_data.csv', index=False)
