In [59]:
import pandas as pd

# Load your CSV
df = pd.read_csv("growth_zscores_labeled.csv")

# Print all column names
print(df.columns.tolist())


['Date', 'Industrial Production (Mom %)', ' Capacity Utilization Index Mom (%)', ' Nonfarm Payrolls Index Mom (%)', ' Nonfarm Payrolls Index Mom (%).1', ' Advance Retail Sales Index Mom (%)', 'Manufacturers New Orders Index Mom (%)', 'Real GDP QoQ (%)', 'Real Personal Income MoM (%)', 'Industrial Production (Mom %)_Label', ' Capacity Utilization Index Mom (%)_Label', ' Nonfarm Payrolls Index Mom (%)_Label', ' Nonfarm Payrolls Index Mom (%).1_Label', ' Advance Retail Sales Index Mom (%)_Label', 'Manufacturers New Orders Index Mom (%)_Label', 'Real GDP QoQ (%)_Label', 'Real Personal Income MoM (%)_Label']


In [61]:
import pandas as pd

df = pd.read_csv("growth_data.csv")

# Show column names clearly
for col in df.columns:
    print(f"'{col}'")


'Date'
'Industrial Production (Mom %)'
' Capacity Utilization Index Mom (%)'
' Nonfarm Payrolls Index Mom (%)'
' Nonfarm Payrolls Index Mom (%).1'
' Advance Retail Sales Index Mom (%)'
'Manufacturers New Orders Index Mom (%)'
'Real GDP QoQ (%)'
'Real Personal Income MoM (%)'


In [63]:
import pandas as pd

# Load the data
df = pd.read_csv("growth_data.csv")

# Step 1: Strip leading/trailing whitespace
df.columns = df.columns.str.strip()

# Step 2: Rename all columns to clean, code-friendly names
df.rename(columns={
    'Industrial Production (Mom %)': 'Industrial_Production_Z',
    'Capacity Utilization Index Mom (%)': 'Capacity_Utilization_Z',
    'Nonfarm Payrolls Index Mom (%)': 'Nonfarm_Payrolls_Z',
    'Nonfarm Payrolls Index Mom (%).1': 'Unemployment_Rate_Z',
    'Advance Retail Sales Index Mom (%)': 'Retail_Sales_Z',
    'Manufacturers New Orders Index Mom (%)': 'Durable_Goods_Orders_Z',
    'Real GDP QoQ (%)': 'Real_GDP_Z',
    'Real Personal Income MoM (%)': 'Real_Income_Z'
}, inplace=True)

# Check that it worked
print(df.columns.tolist())



['Date', 'Industrial_Production_Z', 'Capacity_Utilization_Z', 'Nonfarm_Payrolls_Z', 'Unemployment_Rate_Z', 'Retail_Sales_Z', 'Durable_Goods_Orders_Z', 'Real_GDP_Z', 'Real_Income_Z']


In [65]:
# Define sub-theme weights
weights_production = {
    'Industrial_Production_Z': 0.6,
    'Capacity_Utilization_Z': 0.4
}

# Define your scoring function
def normalized_contribution(z, weight, cap=2.0):
    z = max(min(z, cap), -cap)               # Cap between -2 and +2
    scaled = (z + cap) / (2 * cap)           # Scale to 0–1
    return scaled * weight                   # Apply weight

# Apply the function and calculate the Production Sub-Theme Score
df['Production_SubTheme_Score'] = (
    df['Industrial_Production_Z'].apply(lambda z: normalized_contribution(z, 0.6)) +
    df['Capacity_Utilization_Z'].apply(lambda z: normalized_contribution(z, 0.4))
) * 100  # Final score out of 100

# Round it for display
df['Production_SubTheme_Score'] = df['Production_SubTheme_Score'].round(2)

# Preview the result
print(df[['Date', 'Production_SubTheme_Score']].head())


         Date  Production_SubTheme_Score
0  1992-04-01                      69.25
1  1992-05-01                      58.25
2  1992-06-01                      51.50
3  1992-07-01                      72.75
4  1992-08-01                      36.00


In [67]:
weights_demand = {
    'Retail_Sales_Z': 0.6,
    'Real_Income_Z': 0.4
}

df['Demand_SubTheme_Score'] = (
    df['Retail_Sales_Z'].apply(lambda z: normalized_contribution(z, 0.6)) +
    df['Real_Income_Z'].apply(lambda z: normalized_contribution(z, 0.4))
) * 100

df['Demand_SubTheme_Score'] = df['Demand_SubTheme_Score'].round(2)

weights_labor = {
    'Nonfarm_Payrolls_Z': 0.7,
    'Unemployment_Rate_Z': 0.3
}

df['Labor_SubTheme_Score'] = (
    df['Nonfarm_Payrolls_Z'].apply(lambda z: normalized_contribution(z, 0.7)) +
    df['Unemployment_Rate_Z'].apply(lambda z: normalized_contribution(z, 0.3))
) * 100

df['Labor_SubTheme_Score'] = df['Labor_SubTheme_Score'].round(2)

df['Investment_SubTheme_Score'] = df['Durable_Goods_Orders_Z'].apply(lambda z: normalized_contribution(z, 1.0)) * 100
df['Investment_SubTheme_Score'] = df['Investment_SubTheme_Score'].round(2)

df['GDP_SubTheme_Score'] = df['Real_GDP_Z'].apply(lambda z: normalized_contribution(z, 1.0)) * 100
df['GDP_SubTheme_Score'] = df['GDP_SubTheme_Score'].round(2)

# Demand Sub-Theme weights
weights_demand = {
    'Retail_Sales_Z': 0.6,
    'Real_Income_Z': 0.4
}

# Calculate Demand Sub-Theme Score
df['Demand_SubTheme_Score'] = (
    df['Retail_Sales_Z'].apply(lambda z: normalized_contribution(z, 0.6)) +
    df['Real_Income_Z'].apply(lambda z: normalized_contribution(z, 0.4))
) * 100

df['Demand_SubTheme_Score'] = df['Demand_SubTheme_Score'].round(2)

df['Demand_SubTheme_Score'] = (
    df['Retail_Sales_Z'].apply(lambda z: normalized_contribution(z, 0.6)) +
    df['Real_Income_Z'].apply(lambda z: normalized_contribution(z, 0.4))
) * 100

df['Demand_SubTheme_Score'] = df['Demand_SubTheme_Score'].round(2)




In [69]:
# Growth sub-theme weights
growth_theme_weights = {
    'Production_SubTheme_Score': 0.20,
    'Labor_SubTheme_Score': 0.30,
    'Demand_SubTheme_Score': 0.25,
    'Investment_SubTheme_Score': 0.15,
    'GDP_SubTheme_Score': 0.10
}

# Compute weighted sum of sub-theme scores
df['Growth_Theme_Score'] = sum(
    df[col] * weight for col, weight in growth_theme_weights.items()
).round(2)


In [71]:
print(df.columns.tolist())


['Date', 'Industrial_Production_Z', 'Capacity_Utilization_Z', 'Nonfarm_Payrolls_Z', 'Unemployment_Rate_Z', 'Retail_Sales_Z', 'Durable_Goods_Orders_Z', 'Real_GDP_Z', 'Real_Income_Z', 'Production_SubTheme_Score', 'Demand_SubTheme_Score', 'Labor_SubTheme_Score', 'Investment_SubTheme_Score', 'GDP_SubTheme_Score', 'Growth_Theme_Score']


In [73]:
growth_theme_weights = {
    'Production_SubTheme_Score': 0.20,
    'Labor_SubTheme_Score': 0.30,
    'Demand_SubTheme_Score': 0.25,
    'Investment_SubTheme_Score': 0.15,
    'GDP_SubTheme_Score': 0.10
}

df['Growth_Theme_Score'] = sum(
    df[col] * weight for col, weight in growth_theme_weights.items()
).round(2)


In [75]:
print(df[['Date', 'Growth_Theme_Score']].head())  # or .tail() for latest


         Date  Growth_Theme_Score
0  1992-04-01               68.80
1  1992-05-01               70.34
2  1992-06-01               59.36
3  1992-07-01               62.28
4  1992-08-01               55.93


In [77]:
print(df[['Date',
          'Production_SubTheme_Score',
          'Labor_SubTheme_Score',
          'Demand_SubTheme_Score',
          'Investment_SubTheme_Score',
          'GDP_SubTheme_Score',
          'Growth_Theme_Score']].head(10))  # prints first 10 rows


         Date  Production_SubTheme_Score  Labor_SubTheme_Score  \
0  1992-04-01                      69.25                 67.45   
1  1992-05-01                      58.25                 67.10   
2  1992-06-01                      51.50                 66.05   
3  1992-07-01                      72.75                 66.40   
4  1992-08-01                      36.00                 66.92   
5  1992-09-01                      58.00                 65.52   
6  1992-10-01                      67.00                 67.97   
7  1992-11-01                      59.50                 67.27   
8  1992-12-01                      53.75                 68.50   
9  1993-01-01                      59.75                 69.55   

   Demand_SubTheme_Score  Investment_SubTheme_Score  GDP_SubTheme_Score  \
0                  69.05                      65.00               77.00   
1                  85.20                      63.75               77.00   
2                  52.00                      57

In [79]:
df[['Date',
    'Production_SubTheme_Score',
    'Labor_SubTheme_Score',
    'Demand_SubTheme_Score',
    'Investment_SubTheme_Score',
    'GDP_SubTheme_Score',
    'Growth_Theme_Score']].to_csv("growth_theme_output.csv", index=False)
