In [32]:
import pandas as pd

In [33]:
# Load the uploaded CSV file to examine its structure
file_path = './Summary_Indices_ics_icc_ice_082024.csv'
umich_data = pd.read_csv(file_path)

In [34]:
# Display the first few rows to understand its structure
umich_data.head()

Unnamed: 0,yyyymm,ics_all,ics_a1834,ics_a3554,ics_a5597,ics_w,ics_nc,ics_ne,ics_s,ics_m,...,icc_ne,icc_s,icc_m,icc_f,icc_y13,icc_y23,icc_y33,icc_ehs,icc_esc,icc_ecd
0,197801,83.7,93.7,86.7,75.0,83.6,85.5,84.1,81.3,90.2,...,95.4,92.0,101.9,91.9,,,,93.3,107.6,100.0
1,197802,84.3,99.7,82.3,77.1,94.2,81.9,83.1,81.6,90.2,...,91.0,92.8,101.2,91.1,,,,92.7,101.2,102.4
2,197803,78.8,91.7,76.8,68.8,79.6,76.7,78.7,79.7,86.6,...,89.1,97.6,100.0,88.8,,,,88.7,100.0,98.8
3,197804,81.6,91.6,79.7,73.9,81.6,80.9,82.5,81.6,86.3,...,96.8,101.1,102.7,94.7,,,,94.3,100.6,106.2
4,197805,82.9,95.6,78.9,74.4,86.4,81.5,79.7,84.5,90.1,...,96.5,100.7,103.4,94.9,,,,94.6,99.5,107.9


In [35]:
# Dictionary to map old column names to new descriptive names
column_rename_map = {
    'ics_all': 'Index_con_sentiment', 'ice_all': 'Index_con_expectation','icc_all': 'Index_econ_conditions',
    'ics_a1834': 'con_sentiment_AGE_18-34', 'ice_a1834': 'con_expectation_AGE_18-34','icc_a1834': 'econ_conditions_AGE_18-34',
    'ics_a3554': 'con_sentiment_AGE_35-54', 'ice_a3554': 'con_expectation_AGE_35-54','icc_a3554': 'econ_conditions_AGE_35-54',
    'ics_a5597': 'con_sentiment_AGE_55-97','ice_a5597': 'con_expectation_AGE_55-97','icc_a5597': 'econ_conditions_AGE_55-97',
    'ics_m': 'con_sentiment_genderM','ice_m': 'con_expectation_genderM','icc_m': 'econ_conditions_genderM',
    'ics_f': 'con_sentiment_genderW','ice_f': 'con_expectation_genderW','icc_f': 'econ_conditions_genderW',
    'ics_nc': 'con_sentiment_region_NorthCentral','ice_nc': 'con_expectation_region_NorthCentral','icc_nc': 'econ_conditions_region_NorthCentral',
    'ics_ne': 'con_sentiment_region_Northeast','ice_ne': 'con_expectation_region_Northeast','icc_ne': 'econ_conditions_region_Northeast',
    'ics_s': 'con_sentiment_region_South','ice_s': 'con_expectation_region_South','icc_s': 'econ_conditions_region_South',
    'ics_w': 'con_sentiment_region_West','ice_w': 'con_expectation_region_West','icc_w': 'econ_conditions_region_West',
    'ics_y13': 'con_sentiment_income_LowestThird','ice_y13': 'con_expectation_income_LowestThird','icc_y13': 'econ_conditions_income_LowestThird',
    'ics_y23': 'con_sentiment_income_MiddleThird','ice_y23': 'con_expectation_income_MiddleThird','icc_y23': 'econ_conditions_income_MiddleThird',
    'ics_y33': 'con_sentiment_income_HighestThird','ice_y33': 'con_expectation_income_HighestThird','icc_y33': 'econ_conditions_income_HighestThird',
    'ics_ehs': 'con_sentiment_education_HS_orLess','ice_ehs': 'con_expectation_education_HS_orLess','icc_ehs': 'econ_conditions_education_HS_orLess',
    'ics_esc': 'con_sentiment_education_SomeCollege','ice_esc': 'con_expectation_education_SomeCollege','icc_esc': 'econ_conditions_education_SomeCollege',
    'ics_ecd': 'con_sentiment_education_CollegeDegreeOrHigher','ice_ecd': 'con_expectation_education_CollegeDegreeOrHigher','icc_ecd': 'econ_conditions_education_CollegeDegreeOrHigher'
}

# Rename the columns in the DataFrame
umich_data_renamed = umich_data.rename(columns=column_rename_map)


In [36]:
# Define groups and their associated columns with updated descriptive names
Demo_group_mappings = {
    'All': ['Index_con_sentiment', 'Index_con_expectation', 'Index_econ_conditions'],
    'Age 18-34': ['con_sentiment_AGE_18-34', 'con_expectation_AGE_18-34', 'econ_conditions_AGE_18-34'],
    'Age 35-54': ['con_sentiment_AGE_35-54', 'con_expectation_AGE_35-54', 'econ_conditions_AGE_35-54'],
    'Age 55-97': ['con_sentiment_AGE_55-97', 'con_expectation_AGE_55-97', 'econ_conditions_AGE_55-97'],
    'Men': ['con_sentiment_genderM', 'con_expectation_genderM', 'econ_conditions_genderM'],
    'Women': ['con_sentiment_genderW', 'con_expectation_genderW', 'econ_conditions_genderW'],
    'North Central': ['con_sentiment_region_NorthCentral', 'con_expectation_region_NorthCentral', 'econ_conditions_region_NorthCentral'],
    'Northeast': ['con_sentiment_region_Northeast', 'con_expectation_region_Northeast', 'econ_conditions_region_Northeast'],
    'South': ['con_sentiment_region_South', 'con_expectation_region_South', 'econ_conditions_region_South'],
    'West': ['con_sentiment_region_West', 'con_expectation_region_West', 'econ_conditions_region_West'],
    'Lowest Third Income': ['con_sentiment_income_LowestThird', 'con_expectation_income_LowestThird', 'econ_conditions_income_LowestThird'],
    'Middle Third Income': ['con_sentiment_income_MiddleThird', 'con_expectation_income_MiddleThird', 'econ_conditions_income_MiddleThird'],
    'Highest Third Income': ['con_sentiment_income_HighestThird', 'con_expectation_income_HighestThird', 'econ_conditions_income_HighestThird'],
    'High School or Less': ['con_sentiment_education_HS_orLess', 'con_expectation_education_HS_orLess', 'econ_conditions_education_HS_orLess'],
    'Some College': ['con_sentiment_education_SomeCollege', 'con_expectation_education_SomeCollege', 'econ_conditions_education_SomeCollege'],
    'College Degree or Higher': ['con_sentiment_education_CollegeDegreeOrHigher', 'con_expectation_education_CollegeDegreeOrHigher', 'econ_conditions_education_CollegeDegreeOrHigher']
}


In [37]:
# Create a new structured DataFrame
structured_data = pd.DataFrame()


In [46]:
structured_data.head()

Unnamed: 0,Date,ICS Score,ICE Score,ICC Score,Group
0,197801,83.7,75.7,96.2,All
1,197802,84.3,77.2,95.4,All
2,197803,78.8,69.5,93.2,All
3,197804,81.6,71.1,98.0,All
4,197805,82.9,73.0,98.2,All


In [38]:
# Transform and combine data by iterating through each group and associated columns
for group, columns in Demo_group_mappings.items():
    temp_df = umich_data_renamed[['yyyymm'] + columns].copy()  # Using the renamed DataFrame
    temp_df.columns = ['Date', 'ICS Score', 'ICE Score', 'ICC Score']  # Standardize column names
    temp_df['Group'] = group  # Add group information
    
    # Append to the main structured DataFrame
    structured_data = pd.concat([structured_data, temp_df], ignore_index=True)

In [41]:
# Copy the structured data to avoid altering the original DataFrame
structured_data_with_demo = structured_data.copy()

# Initialize demographic columns with None
structured_data_with_demo['Age'] = None
structured_data_with_demo['Region'] = None
structured_data_with_demo['Gender'] = None
structured_data_with_demo['Income'] = None
structured_data_with_demo['Education'] = None

Step-by-Step Explanation:
Identify Rows with Specific Keywords:

Each line uses .str.contains() to search for a keyword in the Group column.
For example, structured_data_with_demo['Group'].str.contains('Age') checks if the string "Age" is present in each value of the Group column.
Select Rows with the Keyword:

The .loc[] function is used to select the rows where the keyword was found.
structured_data_with_demo.loc[...] allows us to specify a condition and then select or modify specific rows that meet that condition.
Populate the Corresponding Demographic Column:

For the rows that contain the keyword, the value in the Group column is copied to the appropriate demographic column.
For instance, if the Group value contains "Age," the value from Group is placed in the Age column for that row.
This operation is repeated for each demographic type (Age, Region, Gender, Income, Education).

Result:
Each demographic column (Age, Region, etc.) is filled based on the keywords found in the Group column, creating easy-to-filter fields for analysis. This approach leverages the pattern in the Group values to sort them efficiently into their respective demographic categories.

In [45]:
# Populate the demographic columns based on keywords in the 'Group' column
structured_data_with_demo.loc[structured_data_with_demo['Group'].str.contains('All'), 'All'] = structured_data_with_demo['Group']
structured_data_with_demo.loc[structured_data_with_demo['Group'].str.contains('Age'), 'Age'] = structured_data_with_demo['Group']
structured_data_with_demo.loc[structured_data_with_demo['Group'].str.contains('region'), 'Region'] = structured_data_with_demo['Group']
structured_data_with_demo.loc[structured_data_with_demo['Group'].str.contains('gender'), 'Gender'] = structured_data_with_demo['Group']
structured_data_with_demo.loc[structured_data_with_demo['Group'].str.contains('income'), 'Income'] = structured_data_with_demo['Group']
structured_data_with_demo.loc[structured_data_with_demo['Group'].str.contains('education'), 'Education'] = structured_data_with_demo['Group']


structured_data_with_demo.head()


Unnamed: 0,Date,ICS Score,ICE Score,ICC Score,Group,Age,Region,Gender,Income,Education,All
0,197801,83.7,75.7,96.2,All,,,,,,All
1,197802,84.3,77.2,95.4,All,,,,,,All
2,197803,78.8,69.5,93.2,All,,,,,,All
3,197804,81.6,71.1,98.0,All,,,,,,All
4,197805,82.9,73.0,98.2,All,,,,,,All


In [43]:
structured_data_with_demo.shape

(8960, 10)

In [50]:
structured_data_with_demo.to_csv('umichigan_ics_icc_ice_restructured.csv', index=False)