In [15]:
#How does financial literacy vary across age groups and genders in the 2024 NFCS data?
#How does financial literacy vary across regions and divisions in the 2024 NFCS data?

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# === 1) Load original dataset ===
cleaned_df = pd.read_csv("2018 Cleaned.csv", na_values=["", " ", "NA", "N/A", "null", ".", "na"])
original_df = pd.read_csv("NFCS 2018 Investor Data 191107.csv", na_values=["", " ", "NA", "N/A", "null", ".", "na"])

# --- Reusable scales -------------------------------------------------

YES_NO_DK_PREF = {1: "Yes", 2: "No", 98: "Don't know", 99: "Prefer not to say"}  # :contentReference[oaicite:0]{index=0}
DK_PREF = {98: "Don't know", 99: "Prefer not to say"}

ONE_TO_TEN_CONF = {
    1: "1 - Not at all confident", 2: "2", 3: "3", 4: "4", 5: "5",
    6: "6", 7: "7", 8: "8", 9: "9", 10: "10 - Extremely confident", **DK_PREF
}  # :contentReference[oaicite:1]{index=1}

AGREE_7 = {
    1: "1 - Strongly disagree", 2: "2", 3: "3",
    4: "4 - Neither agree nor disagree", 5: "5", 6: "6",
    7: "7 - Strongly agree", **DK_PREF
}  # :contentReference[oaicite:2]{index=2}

FREQUENCY_3 = {1: "Never", 2: "Sometimes", 3: "Frequently", **DK_PREF}  # :contentReference[oaicite:3]{index=3}

# --- Per-question dictionaries --------------------------------------

LABELS_BY_COLUMN = {
    # A-section (investing status & retirement)
    "A1": {
        1: "Primary decision-maker",
        2: "Share decision-making",
        3: "Do not participate",
        **DK_PREF
    },  # :contentReference[oaicite:4]{index=4}

    "A2": YES_NO_DK_PREF,  # :contentReference[oaicite:5]{index=5}
    "A3": YES_NO_DK_PREF,  # :contentReference[oaicite:6]{index=6}

    # B2_* ownership in non-retirement accounts
    "B2_1": YES_NO_DK_PREF,  # Individual stocks :contentReference[oaicite:7]{index=7}
    "B2_2": YES_NO_DK_PREF,  # Individual bonds :contentReference[oaicite:8]{index=8}
    "B2_3": YES_NO_DK_PREF,  # Mutual funds :contentReference[oaicite:9]{index=9}
    "B2_4": YES_NO_DK_PREF,  # ETFs :contentReference[oaicite:10]{index=10}
    "B2_5": YES_NO_DK_PREF,  # Annuities :contentReference[oaicite:11]{index=11}
    "B2_7": YES_NO_DK_PREF,  # Commodities/futures :contentReference[oaicite:12]{index=12}
    "B2_20": YES_NO_DK_PREF, # Whole life insurance :contentReference[oaicite:13]{index=13}
    "B2_21": YES_NO_DK_PREF, # REITs :contentReference[oaicite:14]{index=14}
    "B2_22": YES_NO_DK_PREF, # Options :contentReference[oaicite:15]{index=15}
    "B2_23": YES_NO_DK_PREF, # Microcap/penny stocks :contentReference[oaicite:16]{index=16}
    "B2_24": YES_NO_DK_PREF, # Structured notes :contentReference[oaicite:17]{index=17}
    "B2_25": YES_NO_DK_PREF, # Private placements :contentReference[oaicite:18]{index=18}

    # Trading frequency & balances
    "B3": {
        1: "None",
        2: "1 to 3 times",
        3: "4 to 10 times",
        4: "11 times or more",
        **DK_PREF
    },  # :contentReference[oaicite:19]{index=19}

    "B4": {
        1: "Less than $2,000",
        2: "$2,000 to < $5,000",
        3: "$5,000 to < $10,000",
        4: "$10,000 to < $25,000",
        5: "$25,000 to < $50,000",
        6: "$50,000 to < $100,000",
        7: "$100,000 to < $250,000",
        8: "$250,000 to < $500,000",
        9: "$500,000 to < $1,000,000",
        10: "$1,000,000 or more",
        **DK_PREF
    },  # :contentReference[oaicite:20]{index=20}

    # Margin, lines of credit
    "B5": YES_NO_DK_PREF,   # margin allowed :contentReference[oaicite:21]{index=21}
    "B6": YES_NO_DK_PREF,   # purchased on margin :contentReference[oaicite:22]{index=22}
    "B20": YES_NO_DK_PREF,  # margin call ever :contentReference[oaicite:23]{index=23}
    "B21": YES_NO_DK_PREF,  # loan/LOC vs investments :contentReference[oaicite:24]{index=24}

    # Risk tolerance & stock share
    "B10": {
        1: "Take substantial risks for substantial returns",
        2: "Above average risks for above average returns",
        3: "Average risks for average returns",
        4: "Not willing to take risks",
        **DK_PREF
    },  # :contentReference[oaicite:25]{index=25}

    "B11": {
        1: "More than half",
        2: "Less than half",
        3: "None",
        **DK_PREF
    },  # :contentReference[oaicite:26]{index=26}

    # Crypto awareness, risk, investing, intent
    "B23": YES_NO_DK_PREF,  # heard of crypto :contentReference[oaicite:27]{index=27}
    "B24": {
        1: "Not at all risky",
        2: "Slightly risky",
        3: "Moderately risky",
        4: "Very risky",
        5: "Extremely risky",
        **DK_PREF
    },  # :contentReference[oaicite:28]{index=28}
    "B25": YES_NO_DK_PREF,  # invested in crypto :contentReference[oaicite:29]{index=29}
    "B26": YES_NO_DK_PREF,  # considering crypto :contentReference[oaicite:30]{index=30}

    # How decisions are made (C20_*), approvals (C21)
    "C20_1": FREQUENCY_3,  # pro chooses for me :contentReference[oaicite:31]{index=31}
    "C20_2": FREQUENCY_3,  # discuss then decide :contentReference[oaicite:32]{index=32}
    "C20_3": FREQUENCY_3,  # own research then decide :contentReference[oaicite:33]{index=33}
    "C20_4": FREQUENCY_3,  # web-based robo tool :contentReference[oaicite:34]{index=34}
    "C20_5": FREQUENCY_3,  # mobile app chooses :contentReference[oaicite:35]{index=35}
    "C21": YES_NO_DK_PREF, # pro needs approval per trade :contentReference[oaicite:36]{index=36}

    # How you place trades (C22_*)
    "C22_1": FREQUENCY_3,  # contact advisor to place order :contentReference[oaicite:37]{index=37}
    "C22_2": FREQUENCY_3,  # call firm rep :contentReference[oaicite:38]{index=38}
    "C22_3": FREQUENCY_3,  # place orders on website :contentReference[oaicite:39]{index=39}
    "C22_4": FREQUENCY_3,  # place orders on mobile app :contentReference[oaicite:40]{index=40}

    # Fees paid (C23_*), annual fee % (C24), confidence in answer (C25), trusted contact (C26)
    "C23_1": YES_NO_DK_PREF,  # advice fees :contentReference[oaicite:41]{index=41}
    "C23_2": YES_NO_DK_PREF,  # trade commissions :contentReference[oaicite:42]{index=42}
    "C23_3": YES_NO_DK_PREF,  # mutual fund fees :contentReference[oaicite:43]{index=43}
    "C23_4": YES_NO_DK_PREF,  # account service fees :contentReference[oaicite:44]{index=44}
    "C24": {
        1: "Do not pay any fees",
        2: "Less than 0.5%",
        3: "0.5% to < 1%",
        4: "1% to < 2%",
        5: "2% to < 4%",
        6: "4% or more",
        **DK_PREF
    },  # :contentReference[oaicite:45]{index=45}
    "C25": ONE_TO_TEN_CONF,  # confidence in that % answer :contentReference[oaicite:46]{index=46}
    "C26": YES_NO_DK_PREF,   # authorized trusted contact :contentReference[oaicite:47]{index=47}

    # Regulator check
    "C7": YES_NO_DK_PREF,  # checked background with regulator :contentReference[oaicite:48]{index=48}

    # D-section: market confidence, expectations, reactions
    "D1_1": ONE_TO_TEN_CONF,  # long-term opportunities :contentReference[oaicite:49]{index=49}
    "D1_2": ONE_TO_TEN_CONF,  # market fairness :contentReference[oaicite:50]{index=50}
    "D2": {
        1: "< 0%", 2: "0% to 4.9%", 3: "5% to 9.9%", 4: "10% to 14.9%",
        5: "15% to 19.9%", 6: "20% or more", **DK_PREF
    },  # :contentReference[oaicite:51]{index=51}
    "D3": {
        1: "Worse than market",
        2: "About the same as market",
        3: "Better than market",
        **DK_PREF
    },  # :contentReference[oaicite:52]{index=52}
    "D20": {1: "Bought", 2: "Sold", 3: "Neither", **DK_PREF},  # :contentReference[oaicite:53]{index=53}
    "D21": {1: "Buy", 2: "Sell", 3: "Neither", **DK_PREF},      # :contentReference[oaicite:54]{index=54}
    "D4": AGREE_7,  # worry about fraud :contentReference[oaicite:55]{index=55}

    # E-section: regulation, disclosures
    "E1_1": ONE_TO_TEN_CONF,  # confidence in regulation :contentReference[oaicite:56]{index=56}
    "E20": YES_NO_DK_PREF,    # disclosures adequate :contentReference[oaicite:57]{index=57}
    "E5": {
        1: "Protecting investors", 2: "Protecting institutions",
        3: "Both", 4: "Neither", **DK_PREF
    },  # :contentReference[oaicite:58]{index=58}
    "E6": {
        1: "In-person meeting", 2: "Paper mail", 3: "Email delivery",
        4: "Access on Internet", 5: "None of the above", **DK_PREF
    },  # :contentReference[oaicite:59]{index=59}

    # F-section: information sources & tools
    "F1_1": YES_NO_DK_PREF, "F1_2": YES_NO_DK_PREF, "F1_3": YES_NO_DK_PREF,
    "F1_4": YES_NO_DK_PREF, "F1_5": YES_NO_DK_PREF, "F1_6": YES_NO_DK_PREF,
    "F1_7": YES_NO_DK_PREF, "F1_8": YES_NO_DK_PREF, "F1_9": YES_NO_DK_PREF,  # :contentReference[oaicite:60]{index=60}
    "F2_1": YES_NO_DK_PREF, "F2_2": YES_NO_DK_PREF, "F2_5": YES_NO_DK_PREF,
    "F2_6": YES_NO_DK_PREF, "F2_20": YES_NO_DK_PREF, "F2_21": YES_NO_DK_PREF,
    "F2_22": YES_NO_DK_PREF,  # :contentReference[oaicite:61]{index=61}
    "F3_1": YES_NO_DK_PREF, "F3_2": YES_NO_DK_PREF, "F3_3": YES_NO_DK_PREF,
    "F3_4": YES_NO_DK_PREF, "F3_5": YES_NO_DK_PREF, "F3_6": YES_NO_DK_PREF,
    "F3_7": YES_NO_DK_PREF, "F3_8": YES_NO_DK_PREF,  # :contentReference[oaicite:62]{index=62}
    "F4": YES_NO_DK_PREF, "F5": YES_NO_DK_PREF, "F20": YES_NO_DK_PREF,       # :contentReference[oaicite:63]{index=63}

    # G-section: comfort, knowledge, literacy
    "G1": ONE_TO_TEN_CONF,  # investing comfort :contentReference[oaicite:64]{index=64}
    "G2": {1: "1 - Very low", 2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7 - Very high", **DK_PREF},  # :contentReference[oaicite:65]{index=65}
    "G20": AGREE_7,  # access to info :contentReference[oaicite:66]{index=66}
    "G4": {1: "Own a part of the company", 2: "Lent money", 3: "Liable for debts", 4: "Company returns original investment", **DK_PREF},  # :contentReference[oaicite:67]{index=67}
    "G5": {1: "Own a part of the company", 2: "Lent money to the company", 3: "Liable for debts", 4: "Can vote on shareholder resolutions", **DK_PREF},  # :contentReference[oaicite:68]{index=68}
    "G6": {1: "Preferred stock", 2: "Common stock", 3: "Bonds", **DK_PREF},  # :contentReference[oaicite:69]{index=69}
    "G7": {1: "True", 2: "False", **DK_PREF},                               # :contentReference[oaicite:70]{index=70}
    "G21": {1: "True", 2: "False", **DK_PREF},                              # :contentReference[oaicite:71]{index=71}
    "G8": {1: "Stocks", 2: "Bonds", 3: "CDs", 4: "Money market accounts", 5: "Precious metals", **DK_PREF},  # :contentReference[oaicite:72]{index=72}
    "G22": {1: "Less risky short term", 2: "Lower fees/expenses", 3: "Less likely to decline", **DK_PREF},  # :contentReference[oaicite:73]{index=73}
    "G11": {1: "Lower risk", 2: "Greater demand", 3: "Can be tax-free", **DK_PREF},  # :contentReference[oaicite:74]{index=74}
    "G12": {1: "$500", 2: "$250", 3: "$0", **DK_PREF},  # margin loss math :contentReference[oaicite:75]{index=75}
    "G13": {1: "Sell soon after buying", 2: "Sell before peak", 3: "Sell at a loss", 4: "Sell borrowed shares", **DK_PREF},  # :contentReference[oaicite:76]{index=76}

    # H-section: behaviors
    "H2": YES_NO_DK_PREF,  # read reviews before restaurants :contentReference[oaicite:77]{index=77}
    "H3": YES_NO_DK_PREF,  # purchased a used car :contentReference[oaicite:78]{index=78}

    # Weight & selected demographics (from State-by-State)
    "WGT1": "Scale weight by age and education",  # numeric weight :contentReference[oaicite:79]{index=79}
    "S_Gender": {1: "Male", 2: "Female"},         # :contentReference[oaicite:80]{index=80}
    "S_Age": {1: "18-34", 2: "35-54", 3: "55+"},  # :contentReference[oaicite:81]{index=81}
    "S_Ethnicity": {1: "White Alone (Non-Hispanic)", 2: "Non-White"},  # :contentReference[oaicite:82]{index=82}
    "S_Education": {1: "Some college or less (incl. Associate's)", 2: "College grad (Bachelor's) or more"},  # :contentReference[oaicite:83]{index=83}
    "S_Income": {1: "<$50K", 2: "$50-$100K", 3: "$100K+"},  # :contentReference[oaicite:84]{index=84}
}



# Financial Literacy

### Financial Literacy is defined by the following columns
"J5": "Emergency Funds",
"J6": "College Savings",
"J20": "Emergency Confidence",
"J32": "Credit Record",
"B1": "Checking Account",
"B2": "Savings Account",
"B4": "Overdraw",
"C1_2012": "Retirement Plan",
"C5_2012": "Other Retirement Accounts",
"B14A_1": "Other Investments",
"E15_2015": "Mortgage Payment",
"F1": "Credit Card",
"G20": "Past Due Payments",
"G35": "Late Student Loan",
"G38": "Debt Agency Contact",
"M1_1": "Financial Confidence",
"M4": "Financial Knowledge"

### To score a person on financial literacy we will be scoring them weighting their scores based on the format of the questions 
(To discuss with group)
Current Scheme:
J5: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
J6: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
J20: Certain could come up with full $2,000 (5), Probably could (4), Probably could not (3), certain could not (2), Don't know (0), Prefer not to say (0), No Response Given (0)
J32: Very Good (5), Good (4), About Average (3), Bad (2), Very Bad (1), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
B1: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
B2: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
B4: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
C1_2012: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0) 
C5_2012: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
B14A_1: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0) 
E15_2015: Never (?), Once (?), More than once (?), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
F1: 2 to 3 (?), 3 to 4 (?), 1 (?), 4 to 8 (?), 9 to 12 (?), 13 to 20 (?), More than 20, No credit cards (?), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
G20: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0) 
G35: Never, payments not due (?), Never (?), repaying on time each month (?), Once (?), More than once (?), Don't Know (0), Prefer Not to Say (0), No Response Given (0)
G38: Yes (3), No (2), Don't Know (0), Prefer Not to Say (0), No Response Given (0)


In [16]:
# --- Confidence and financial comfort ---
G1_w = {  # Comfort with investing
    1: 1, 2: 2, 3: 3, 4: 4, 5: 5,
    6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
    98: 0, 99: 0, "No Response Given": 0
}

G2_w = {  # Financial comfort (1=low, 7=high)
    1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7,
    98: 0, 99: 0, "No Response Given": 0
}

D1_1_w = {  # Confidence in long-term market opportunities
    1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
    98: 0, 99: 0, "No Response Given": 0
}

D1_2_w = {  # Confidence in market fairness
    1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
    98: 0, 99: 0, "No Response Given": 0
}

C25_w = {  # Confidence in knowing investment fees
    1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
    98: 0, 99: 0, "No Response Given": 0
}
B2_1_w = {1: 3, 2: 1, 98: 0, 99: 0}  # Individual stocks
B2_3_w = {1: 3, 2: 1, 98: 0, 99: 0}  # Mutual funds
B2_4_w = {1: 3, 2: 1, 98: 0, 99: 0}  # ETFs
B2_5_w = {1: 2, 2: 1, 98: 0, 99: 0}  # Annuities
B2_21_w = {1: 3, 2: 1, 98: 0, 99: 0} # REITs
A2_w = {1: 3, 2: 1, 98: 0, 99: 0}    # Retirement account ownership
A3_w = {1: 3, 2: 1, 98: 0, 99: 0}    # Contribute to retirement
B10_w = {  # Risk tolerance
    1: 5,  # Take substantial risks
    2: 4,
    3: 2,
    4: 1,
    98: 0, 99: 0, "No Response Given": 0
}

D3_w = {  # Self-assessed market performance
    3: 5,  # Better than market
    2: 3,  # About the same
    1: 1,  # Worse
    98: 0, 99: 0, "No Response Given": 0
}

C21_w = {  # Advisor needs approval before trading
    1: 5,  # Yes → cautious/aware
    2: 3,  # No → independent but possibly riskier
    98: 0, 99: 0, "No Response Given": 0
}
G4_w = {  # What does owning a stock mean?
    1: 2,  # Correct answer: own a part of the company
    2: 0, 3: 0, 4: 0, 98: 0, 99: 0
}

G6_w = {  # Stock vs bond identification
    2: 2,  # Correct answer: Common stock
    1: 0, 3: 0, 98: 0, 99: 0
}

G7_w = {1: 2, 2: 0, 98: 0, 99: 0}     # True/False financial knowledge item  
G8_w = {1: 2, 2: 0, 3: 0, 4: 0, 5: 0, 98: 0, 99: 0}  # Risk perception  

WEIGHTS = {
    # ----- Bucket 1: Critical Stability -----
    "Investing_Comfort": G1_w,
    "Financial_Comfort": G2_w,
    "Market_Confidence_LongTerm": D1_1_w,
    "Market_Fairness_Confidence": D1_2_w,
    "Fee_Confidence": C25_w,

    # ----- Bucket 2: Core Access & Assets -----
    "Own_Stocks": B2_1_w,
    "Own_MutualFunds": B2_3_w,
    "Own_ETFs": B2_4_w,
    "Own_Annuities": B2_5_w,
    "Own_REITs": B2_21_w,
    "Retirement_Account": A2_w,
    "Contribute_Retirement": A3_w,

    # ----- Bucket 3: Credit Health & Risk Behavior -----
    "Risk_Tolerance": B10_w,
    "Market_Performance": D3_w,
    "Advisor_Approval": C21_w,

    # ----- Bucket 4: Supplementary Planning & Knowledge -----
    "Stock_Ownership_Knowledge": G4_w,
    "Stock_vs_Bond_Knowledge": G6_w,
    "Financial_Knowledge_TrueFalse": G7_w,
    "Risk_Perception_Knowledge": G8_w,
}


In [17]:
# === 3) Sorting Variables ===

# Rename dictionary for predictors
predictor_rename_dict = {
    "S_Education": "Education",
    "S_Age": "Age Group",
    "S_Gender": "Gender",
    "S_Income": "Annual Income",
    "S_Ethnicity": "Ethnicity"
}

# ----- Financial Literacy Outcomes -----
finlit_rename_dict = {
    # --- Bucket 1: Critical Stability ---
    "G1": "Investing_Comfort",
    "G2": "Financial_Comfort",
    "D1_1": "Market_Confidence_LongTerm",
    "D1_2": "Market_Fairness_Confidence",
    "C25": "Fee_Confidence",

    # --- Bucket 2: Core Access & Assets ---
    "A2": "Retirement_Account",
    "A3": "Contribute_Retirement",
    "B2_1": "Own_Stocks",
    "B2_3": "Own_MutualFunds",
    "B2_4": "Own_ETFs",
    "B2_5": "Own_Annuities",
    "B2_21": "Own_REITs",

    # --- Bucket 3: Credit Health & Risk Behavior ---
    "B10": "Risk_Tolerance",
    "D3": "Market_Performance",
    "C21": "Advisor_Approval",

    # --- Bucket 4: Supplementary Planning & Knowledge ---
    "G4": "Stock_Ownership_Knowledge",
    "G6": "Stock_vs_Bond_Knowledge",
    "G7": "Financial_Knowledge_TrueFalse",
    "G8": "Risk_Perception_Knowledge"
}

# Combine both into one dictionary
rename_dict = {**predictor_rename_dict, **finlit_rename_dict}

# Rename columns in dataframe
original_df = original_df.rename(columns=rename_dict)

In [18]:
# Defining Financial Literacy
financial_literacy = original_df.copy()
print(financial_literacy.head(10).to_string)

for col, weight_dict in WEIGHTS.items():
    # financial_literacy[f"{col}_score"] = financial_literacy[col].map(weight_dict)
    original_df[f"{col}_score"] = original_df[col].map(weight_dict)



<bound method DataFrame.to_string of        NFCSID  A1  Retirement_Account  Contribute_Retirement  Own_Stocks  \
0  2018010042   2                   1                      1          98   
1  2018010047   1                   1                      1           1   
2  2018010050   2                   1                      1           1   
3  2018010051   1                   1                      1           2   
4  2018010053   1                   1                      1           1   
5  2018010054   2                   1                      1           1   
6  2018010066   1                   1                      1           2   
7  2018010078   1                   1                      1           2   
8  2018010082   1                   1                      1           1   
9  2018010094   2                   1                      1           2   

   B2_2  Own_MutualFunds  Own_ETFs  Own_Annuities  B2_7  ...  G12  G13  H2  \
0    98                1         1             9

In [19]:
original_df["Total_Score"] = original_df[[f"{col}_score" for col in WEIGHTS.keys()]].sum(axis=1)

conditions = [
    (original_df["Total_Score"] >= 85),                                 # top 10%
    (original_df["Total_Score"] >= 70) & (original_df["Total_Score"] < 85),
    (original_df["Total_Score"] >= 50) & (original_df["Total_Score"] < 70),
    (original_df["Total_Score"] >= 35) & (original_df["Total_Score"] < 50),
    (original_df["Total_Score"] >= 15) & (original_df["Total_Score"] < 35),
    (original_df["Total_Score"] > 0) & (original_df["Total_Score"] < 15),
    (original_df["Total_Score"] == 0)
]

choices = ["Perfect", "High", "Average", "Below Average", "Low", "Bad", "None"]

original_df["FinLit_Level"] = np.select(conditions, choices, default="No Response Given")



In [20]:

print("Pre-mapping", original_df.isna().sum().sum())

for col, mapping in LABELS_BY_COLUMN.items():
    if col in original_df.columns and isinstance(mapping, dict):
        original_df[col] = pd.Series.map(original_df[col], mapping)

print("Post-mapping", cleaned_df.isna().sum().sum())

original_df.fillna("No Response Given", inplace=True)

print("Post-fillna", cleaned_df.isna().sum().sum())


Pre-mapping 10249
Post-mapping 729
Post-fillna 729


  original_df.fillna("No Response Given", inplace=True)


In [21]:

original_df.to_csv("2018 Cleaned.csv", index=False)
# Define new lists with human-readable names
predictors = list(predictor_rename_dict.values())
financial_lit_cols = list(finlit_rename_dict.values())

print("Post-cleaning", original_df.isna().sum().sum())

Post-cleaning 0
