In [1]:
import pandas as pd
import numpy as np
import scipy as sp

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import mpl_toolkits as mplot3d

pd.set_option("display.max_columns", None)
pd.set_option('future.no_silent_downcasting', True)

from tqdm import tqdm
tqdm.pandas()

In [2]:
import_folder_path = rf"../../data_cleaned/india/dynata"

# DATA WRANGLING

In [3]:
df00 = pd.read_csv(rf"{import_folder_path}/Indian Risk Literacy DYNATA_280525.csv")[26:]
df00[["Q9b_11", "Q9b_5", "Q9b_6", "Q9b_12"]] = df00[["Q9b_11", "Q9b_5", "Q9b_6", "Q9b_12"]].astype(float)
df00["Q9b_11"] = df00["Q9b_5"] + df00["Q9b_11"]
df00["Q9b_12"] = df00["Q9b_6"] + df00["Q9b_12"]
df00.drop(["Q9b_5", "Q9b_6"], axis = 1, inplace = True)
# 
df00.rename({"ResponseId":"responseid"}, axis = 1, inplace = True)

# drop na in responses
df01 = df00.dropna(subset = df00.columns[19:77])
print("Post dropping NA:", len(df01["responseid"]))

# removing students
df01 = df01.loc[ ~(df01["Q4a"] == "Student")].copy()
print("Post removing students:", len(df01["responseid"]))

# removing straight liners
matrix = ["Q8_1", "Q8_2", "Q8_3", "Q8_4", "Q8_5", "Q8_6", "Q8_7", "Q8_8"]
conversion_dict = {"1 (strongly disagree)":1 , "2":2, "3":3, "4":4, "5":5, "6 (strongly agree)":6}
for ques in matrix:
    df01[ques] = df01[ques].astype(str).replace(conversion_dict)

df01[ques] = pd.to_numeric(df01[ques], errors='coerce')

df01["Q8_std"] = df01[matrix].std(axis = 1)
df01 = df01.loc[ df01["Q8_std"] != 0].copy()
print("Post removing straightliners:", len(df01["responseid"]))

# attention check
df01 = df01.loc[ df01["Attention Check"] == "Vase"].copy()
print("Post attention chaeck failures:", len(df01["responseid"]))

# <0.5 median time of completion filtered
df01["Duration (in seconds)"] = df01["Duration (in seconds)"].astype("Int64")
median = df01["Duration (in seconds)"].astype("Int64").median()
df01 = df01.loc[ df01["Duration (in seconds)"] >= median*0.5].copy()
print("Post removing <0.5 median time responses:", len(df01["responseid"]))

# # ONLY SALARIED 
# df01 = df01.loc[ df01["Q4a"] == "Salaried"].copy()
# print("Post removing non-salaried responses:", len(df01["responseid"]))

df0 = df01.iloc[:, 19:77].copy().reset_index(drop = True)
df0.insert(0, "responseid", range(0, len(df0)))

Post dropping NA: 963
Post removing students: 961
Post removing straightliners: 846
Post attention chaeck failures: 846
Post removing <0.5 median time responses: 699


In [4]:
df0["age"] = df0["Q2"].astype(float).copy()
df0["gender_text"] = df0["Q1"]

# Age groups/rec = [2,3,4]
# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o (75 y/o, i.e, within the scope of the data we have, it can mean 60 and above also)

df0["age_rec"] = np.where( (df0["age"] >= 18) & (df0["age"] <= 35), 2,
                          np.where( (df0["age"] >= 36) & (df0["age"] <= 55), 3,
                                   np.where( (df0["age"] >= 56), 4, 0
                                           )
                                  )
                         )

# Secondary upto 10 – ISCED 3
# Senior Secondary upto 12 - ISCED 3 
# Diploma and voca – ISCED 2
# UG – ISCED 2
# PG – ISCED 1
# PhD and higher – ISCED 1

df0["isced"] = np.where(df0["Q3"] == "Post-Graduate Program", 1,
                        np.where(df0["Q3"] == "Undergraduate Program", 2,
                                 np.where(df0["Q3"] == "Ph.D. and higher", 1,
                                          np.where(df0["Q3"] == "Diploma and Vocational Training", 2,
                                                   np.where(df0["Q3"] == "Secondary School (11th to 12th Std.)", 3,
                                                            np.where( df0["Q3"] == "Primary School (up to 10th Std.)", 3,
                                                                     np.where( df0["Q3"] == "M.Phil.", 1, 0
                                                                             )
                                                                    )
                                                           )
                                                  )
                                         )
                                )
                       )

df0["income"] = np.where(df0["Q15a"] == "< INR 500,000", 1,
                         np.where(df0["Q15a"] == "INR 500,001 – INR 15,00,000", 2,
                                  np.where(df0["Q15a"] == "INR 1500,001 – INR 30,00,000", 3,
                                           np.where(df0["Q15a"] == "INR 30,00,001 – INR 50,00,000", 4,
                                                    np.where(df0["Q15a"] == "INR 50,00,001 – INR 75,00,000", 5,
                                                             np.where( df0["Q15a"] == "> INR 75,00,000", 6, -1
                                                                     )
                                                            )
                                                   )
                                           )
                                  )
                         )

df0["wealth"] = df0["Q14b"].astype(float).copy()

In [5]:
def remove_indexname(series): # removes index name
    series.index.name = None
    series.name = None
    return series.to_string()

print("Total Observations:", df0["Q1"].count())

print(f'\nGender:\n{remove_indexname(df0["Q1"].value_counts())}')
print(f'\nOccupation:\n{remove_indexname(df0["Q4a"].value_counts())}')
print(f'\nEducation:\n{remove_indexname(df0["Q3"].value_counts())}')
print(f'\nISCED:\n{remove_indexname(df0["isced"].value_counts())}')

vc_income = df0["Q15a"].value_counts()
income = vc_income.iloc[[1, 0, 2, 3, 4, 5]]
print(f'\nIncome:\n{remove_indexname(income)}')

Total Observations: 699

Gender:
Male      361
Female    338

Occupation:
Salaried                       490
Entrepreneur/Business Owner    129
Currently not employed          68
Retired                         12

Education:
Post-Graduate Program                   225
Undergraduate Program                   206
Secondary School (11th to 12th Std.)    177
Diploma and Vocational Training          35
Primary School (up to 10th Std.)         29
Ph.D. and higher                         16
M.Phil.                                  11

ISCED:
1    252
2    241
3    206

Income:
< INR 500,000                    194
INR 500,001 – INR 15,00,000      221
INR 1500,001 – INR 30,00,000     114
INR 30,00,001 – INR 50,00,000     76
INR 50,00,001 – INR 75,00,000     52
> INR 75,00,000                   42


## Questions

In [6]:
df0.columns

Index(['responseid', 'Q1', 'Q2', 'Q3', 'Q4a', 'Q4.5_4', 'Q4.5_5', 'Q4.5_6',
       'Q5_1', 'Q6 - changed', 'Q8_1', 'Q8_2', 'Q8_3', 'Q8_4', 'Q8_5', 'Q8_6',
       'Q8_7', 'Q8_8', 'Q9b_1', 'Q9b_4', 'Q9b_10', 'Q9b_11', 'Q9b_12', 'Q9b_7',
       'Q9b_8', 'Q9b_13', 'Q9b_14', 'Q9b_9', 'Q14b', 'Q11a_1', 'Q11a_2',
       'Q11a_3', 'Q11b_1', 'Q11b_2', 'Q11b_3', 'Q11b_4', 'Q11b_5', 'Q11c',
       'Q11d', 'Q11h', 'Attention Check', 'Q11i', 'Q13a', 'Q13b', 'Q13c',
       'Q13d', 'Q12a', 'Q12b', 'Q12c', 'Q14a', 'Q14b.1', 'Q14c', 'Q54', 'Q57',
       'Q58', 'Q15a', 'psid', 'entrySigCheck', 'verification', 'age',
       'gender_text', 'age_rec', 'isced', 'income', 'wealth'],
      dtype='object')

In [7]:
df0["q8_2_1"] = np.where(df0["Q11b_1"] == "No", 1, 2)
df0["q8_2_2"] = np.where(df0["Q11b_2"] == "No", 1, 2)
df0["q8_2_3"] = np.where(df0["Q11b_3"] == "No", 1, 2)
df0["q8_2_4"] = np.where(df0["Q11b_4"] == "No", 1, 2)
df0["q8_2_5"] = np.where(df0["Q11b_5"] == "No", 1, 2)

df0["q8_3"] = np.where(df0["Q11c"] == "The lower the quality of the study, the more likely that future studies will change the risk estimate.", 1,
                          np.where(df0["Q11c"] == "The higher the quality of the study, the more likely that future studies will change the risk estimate.", 2,
                                  np.where(df0["Q11c"] == "Irrespective of the quality of the study, future studies will not change the risk estimate.", 3,
                                          np.where(df0["Q11c"] == "Irrespective of the quality of the study, future studies will change the risk estimate substantially anyway.", 4,0
                                                  )
                                          )
                                  )
                         )


df0["q8_4"] = np.where(df0["Q11d"] == "The growth rate will be 0.4% on average each year", 1,
                          np.where(df0["Q11d"] == "The growth rate over five years will be exactly 2%", 2,
                                  np.where(df0["Q11d"] == "The growth rate over five years will be between 1% and 3%", 3,
                                          np.where(df0["Q11d"] == "It is not possible to predict the growth rate with certainty", 4,0
                                                  )
                                          )
                                  )
                         )

In [8]:
df0["q8_5berlin_1"] = np.where(df0["Q11a_1"].astype(int) == 1, 1, 2)
df0["q8_5london_1"] = np.where(df0["Q11a_1"].astype(int) == 1, 1, 2)
df0["q8_5paris_1"] = np.where(df0["Q11a_1"].astype(int) == 1, 1, 2)

df0["q8_6"] = np.where(df0["Q11i"] == "The medication increases recovery by 100%", 1,
                          np.where(df0["Q11i"] == "The medication increases recovery by 50%", 2,
                                   np.where(df0["Q11i"] == "The medication increases recovery by 2%", 3,
                                            np.where(df0["Q11i"] == "None of the above is implied", 4, 0
                                                    )
                                           )
                                  )
                         )

df0["q8_7"] = np.where(df0["Q11h"] == "5 in 100 people prior to the intervention to 6 out of 100 people after the intervention", 1,
                          np.where(df0["Q11h"] == "100 in 10,000 people prior to the intervention to 120 out of 10,000 people after the intervention", 2,
                                   np.where(df0["Q11h"] == "70 in 100 people prior to the intervention to 90 out of 100 people after the intervention", 3,
                                            np.where(df0["Q11h"] == "It is not possible to determine which of the answers is correct given the information provided", 4, 0
                                                    )
                                           )
                                  )
                         )


df0["q9_1_1"] = df0["Q14a"].astype(float).copy()
df0["q9_2_1"] = df0["Q14b.1"].astype(float).copy()

df0["q9_3"] = np.where(df0["Q14c"] == "Crosicol", 1,
                          np.where(df0["Q14c"] == "Hertinol", 2,
                                   np.where(df0["Q14c"] == "They are equal", 3,
                                            np.where(df0["Q14c"] == "Can’t say", 4, 0
                                                    )
                                           )
                                  )
                         )

In [9]:
df0["q10_1_1"] = df0["Q13a"].astype(float).copy()
df0["q10_2_1"] = df0["Q13b"].astype(float).copy()
df0["q10_3_1"] = df0["Q13c"].astype(float).copy()

df0["q10_4"] = np.where(df0["Q13d"] == "9 out of 59", 1,
                          np.where(df0["Q13d"] == "9 out of 10", 2,
                                   np.where(df0["Q13d"] == "59 out of 1000", 3,
                                            np.where(df0["Q13d"] == "59 out of 100", 4, 0
                                                    )
                                           )
                                  )
                         )

In [10]:
# Dynata Only

further_analysis_cols = [
    "Q5_1", "Q6 - changed",
    "Q8_1", "Q8_2", "Q8_3", "Q8_4", "Q8_5", "Q8_6", "Q8_7", "Q8_8",
    "Q9b_1", "Q9b_4", "Q9b_10", "Q9b_11", "Q9b_12",
    "Q9b_7", "Q9b_8", "Q9b_13", "Q9b_14",
    "Q12a", "Q12b", "Q12c"
    ] # not used


df0["Q5_1"] = df0["Q5_1"].astype(str)
df0["Q5_score"] = np.where(df0["Q5_1"] == "0 (unwilling to take risk)", 0,
                           np.where(df0["Q5_1"] == "1", 1,
                                    np.where(df0["Q5_1"] == "2", 2,
                                             np.where(df0["Q5_1"] == "3", 3,
                                                  np.where(df0["Q5_1"] == "4", 4,
                                                           np.where(df0["Q5_1"] == "5", 5,
                                                                    np.where(df0["Q5_1"] == "6", 6,
                                                                             np.where(df0["Q5_1"] == "7 (willing to take risk)", 7, -1
                                                                                      )
                                                                             )
                                                                    )
                                                           )
                                                      )
                                             )
                                    )
                           )

df0["Q6 - changed"] = df0["Q6 - changed"].astype(str)
df0["Q6_score"] = np.where(df0["Q6 - changed"] == "Definitely no experience", 0,
                           np.where(df0["Q6 - changed"] == "Rather little experience", 1,
                                        np.where(df0["Q6 - changed"] == "Some experience", 2,
                                                 np.where(df0["Q6 - changed"] == "Rather much experience", 3,
                                                          np.where(df0["Q6 - changed"] == "Definitely very much experience", 4, -1
                                                                   )
                                                          )
                                                 )
                                        )
                           )

# financial planning 2
cols = ["Q8_1", "Q8_2", "Q8_3", "Q8_4", "Q8_5", "Q8_6", "Q8_7", "Q8_8"]
df0[cols] = df0[cols].astype(int)
df0["Q8_total"] = df0[cols].sum(axis = 1)

# financial planning 1
cols = ["Q8_1", "Q8_2", "Q8_3", "Q8_4", "Q8_5", "Q8_6"]
df0[cols] = df0[cols].astype(int)
df0["Q8a_total"] = df0[cols].sum(axis = 1)

# safe assets
cols = ["Q9b_1", "Q9b_4", "Q9b_10", "Q9b_11", "Q9b_12"]
df0[cols] = df0[cols].astype(float)
df0["Q9_safe_total"] = df0[cols].sum(axis = 1)

# risky assets
cols = ["Q9b_7", "Q9b_8", "Q9b_13", "Q9b_14", "Q9b_9"]
df0[cols] = df0[cols].astype(float)
df0["Q9_risky_total"] = df0[cols].sum(axis = 1)

# Financial Literacy
cols = ["Q12a", "Q12b", "Q12c"]
df0[cols] = df0[cols].astype("string")

df0["Q12a_score"] = np.where(df0["Q12a"] == "More than INR 102", 1, 0)
df0["Q12b_score"] = np.where(df0["Q12b"] == "Less than today with the money in this account", 1, 0)
df0["Q12c_score"] = np.where(df0["Q12c"] == "False", 1, 0)

cols = ["Q12a_score", "Q12b_score", "Q12c_score"]
df0["Q12_total"] = df0[cols].sum(axis = 1)

# CRT
cols = ["Q54", "Q57", "Q58"]
df0[cols] = df0[cols].astype(float)

df0["Q54_score"] = np.where((df0["Q54"] == 5) | (df0["Q54"] == 0.05), 1, 0)
df0["Q57_score"] = np.where(df0["Q57"] == 5, 1, 0)
df0["Q58_score"] = np.where(df0["Q58"] == 47, 1, 0)

cols = ["Q54_score", "Q57_score", "Q58_score"]
df0["crt_total"] = df0[cols].sum(axis = 1)

# Financial Planning
cols = ["Q8_1", "Q8_2", "Q8_3", "Q8_4", "Q8_5", "Q8_6", "Q8_7", "Q8_8"]
for i, col in enumerate(cols):
    if i == 6:
        df0.rename({col:f"financialplanning_emergency"}, axis = 1, inplace = True)
    elif i == 7:
        df0.rename({col:f"financialplanning_retirement"}, axis = 1, inplace = True)

    else:
        df0.rename({col:f"financialplanning_{i+1}"}, axis = 1, inplace = True)

# asset classes
asset_classes_dict = {"Q9b_1":"savings_bank", "Q9b_4":"fixed_deposits",
                    "Q9b_10":"recurring_deposits", "Q9b_11":"gov_bonds",
                    "Q9b_12":"corp_bonds", "Q9b_7":"mutual_funds",
                    "Q9b_8":"stocks", "Q9b_13":"real_estate",
                    "Q9b_14":"commodities", "Q9b_9":"other_invest"}

df0.rename(asset_classes_dict, axis = 1, inplace = True)


cols = ["savings_bank", "gov_bonds", "mutual_funds", "stocks"]
for col in cols:
    df0[f"{col}_dummy"] = np.where(df0[col] > 0, 1, 0)
    if col == "savings_bank":
        savings_col = ["savings_bank", "fixed_deposits", "recurring_deposits", "gov_bonds",
                       "corp_bonds", "mutual_funds", "stocks"]
        df0[f"{col}_dummy"] = np.where(df0[savings_col].sum(axis = 1) > 0, 1, 0)

df0["investment_behaviour"] = df0[[col + "_dummy" for col in cols]].sum(axis = 1)
df0["investments"] = df0[["gov_bonds", "corp_bonds", "mutual_funds",
                          "stocks", "real_estate", "commodities", "other_invest"]].sum(axis = 1) 

df0["risky_investments"] = df0[["mutual_funds", "stocks", "real_estate", "commodities", "other_invest"]].sum(axis = 1) 

## SCORING

In [11]:
focal_col = ["q8_2_1", "q8_2_2", "q8_2_3", "q8_2_4", "q8_2_5",
             "q8_3", "q8_4",
             "q8_5berlin_1", "q8_5london_1", "q8_5paris_1", "q8_6", "q8_7",
             "q9_1_1", "q9_2_1", "q9_3",
             "q10_1_1", "q10_2_1", "q10_3_1", "q10_4"]

for col in focal_col:
    df0[col] = df0[col].astype(float)

scoreColumns = ["certainty1", "certainty2", "certainty3", "certainty4", "certainty5",
"uncertainty1", "uncertainty2",
"numeracy1", "numeracy2", "numeracy3", # "numeracy4", "numeracy5",
"graph1", "graph2", "graph3",
"riskcomprehension1", "riskcomprehension2", "riskcomprehension3", "riskcomprehension4", "riskcomprehension5",
"bayesian1"
]

# Dynata Only
req_cols = [
    "riskappetite", "investmentxp", "financialplanning1", "financialplanning2",
    "riskyassets_%", "safeassets_%",
    "financialliteracy", "crtscore"]

df0 = df0.copy()
df0[scoreColumns + req_cols] = 0.0
df0 = df0.copy()

In [12]:
# Assigning scores

def scoring1(surveyFacet):
    surveyFacet.loc[ surveyFacet["q8_2_1"] == 2, "certainty1"] = 1
    surveyFacet.loc[ surveyFacet["q8_2_2"] == 2, "certainty2"] = 1
    surveyFacet.loc[ surveyFacet["q8_2_3"] == 2, "certainty3"] = 1
    surveyFacet.loc[ surveyFacet["q8_2_4"] == 2, "certainty4"] = 1
    surveyFacet.loc[ surveyFacet["q8_2_5"] == 2, "certainty5"] = 1

    surveyFacet.loc[ surveyFacet["q8_3"] == 1, "uncertainty1"] = 1
    surveyFacet.loc[ surveyFacet["q8_4"] == 4, "uncertainty2"] = 1

    surveyFacet.loc[ surveyFacet["q8_5berlin_1"] == 2, "riskcomprehension1"] = 1
    surveyFacet.loc[ surveyFacet["q8_5london_1"] == 1, "riskcomprehension2"] = 1
    surveyFacet.loc[ surveyFacet["q8_5paris_1"] == 1, "riskcomprehension3"] = 1
    
    surveyFacet.loc[ surveyFacet["q8_6"] == 1, "riskcomprehension4"] = 1
    surveyFacet.loc[ surveyFacet["q8_7"] == 4, "riskcomprehension5"] = 1

    surveyFacet.loc[ surveyFacet["q9_1_1"] == 25, "graph1"] = 1
    surveyFacet.loc[ surveyFacet["q9_2_1"] == 20, "graph2"] = 1
    surveyFacet.loc[ surveyFacet["q9_3"] == 3, "graph3"] = 1

    surveyFacet.loc[ surveyFacet["q10_1_1"] == 500, "numeracy1"] = 1
    surveyFacet.loc[ surveyFacet["q10_2_1"] == 10, "numeracy2"] = 1
    surveyFacet.loc[ (surveyFacet["q10_3_1"] == 0.1) | (surveyFacet["q10_3_1"] == ".1") | (surveyFacet["q10_3_1"] == ",1") , "numeracy3"] = 1
    surveyFacet.loc[ surveyFacet["q10_4"] == 1, "bayesian1"] = 1

    # Dynata Only
    surveyFacet.loc[:, "riskappetite"] = surveyFacet["Q5_score"]
    surveyFacet.loc[:, "investmentxp"] = surveyFacet["Q6_score"]
    
    surveyFacet.loc[:, "financialplanning2"] = surveyFacet["Q8_total"]
    surveyFacet.loc[:, "financialplanning1"] = surveyFacet["Q8a_total"]

    surveyFacet.loc[:, "riskyassets_%"] = surveyFacet["Q9_risky_total"]
    surveyFacet.loc[:, "safeassets_%"] = surveyFacet["Q9_safe_total"]
    
    surveyFacet.loc[:, "financialliteracy"] = surveyFacet["Q12_total"]

    surveyFacet.loc[:, "crtscore"] = surveyFacet["crt_total"]

    

    
    # Assigning total scores
    # surveyFacet["Certainty score_5"] = surveyFacet["certainty1"] + surveyFacet["certainty2"] + surveyFacet["certainty3"] + surveyFacet["certainty4"] + surveyFacet["certainty5"]
    #surveyFacet["Uncertainty score_5"] = surveyFacet["uncertainty1"] + surveyFacet["uncertainty2"]

    #surveyFacet["Number Comprehension score_5"] = surveyFacet["numeracy1"] + surveyFacet["numeracy2"] + surveyFacet["numeracy3"] + surveyFacet["numeracy4"] + surveyFacet["numeracy5"]
    #surveyFacet["Graph Comprehension score_5"] = surveyFacet["graph1"] + surveyFacet["graph2"] + surveyFacet["graph3"]

    #surveyFacet["Calculation score_4"] = surveyFacet["riskcalculation1"] + surveyFacet["riskcalculation2"] + surveyFacet["riskcalculation3"] + surveyFacet["riskcalculation4"]

    #surveyFacet["Total Score_19"] = surveyFacet["Certainty score_5"] + surveyFacet["Uncertainty score_5"] + surveyFacet["Number Comprehension score_5"] + surveyFacet["Graph Comprehension score_5"] + surveyFacet["Calculation score_4"]

    return surveyFacet

In [13]:
df1 = df0.groupby(["responseid"]).progress_apply(scoring1)
df1 = df1.reset_index(drop = True).copy()

  return getattr(df, df_function)(wrapper, **kwargs)
100%|██████████| 699/699 [00:04<00:00, 144.34it/s]


In [14]:
def scoring2(survey):
    
    survey["Certainty_5"] = survey["certainty1"] + survey["certainty2"] + survey["certainty3"] + survey["certainty4"] + survey["certainty5"]
    
    survey["Uncertainty_2"] = survey["uncertainty1"] + survey["uncertainty2"]
    
    survey["RiskComprehension_5"] = survey["riskcomprehension1"] + survey["riskcomprehension2"] + survey["riskcomprehension3"] + survey["riskcomprehension4"] + survey["riskcomprehension5"]
    
    survey["GraphLiteracy_3"] = survey["graph1"] + survey["graph2"] + survey["graph3"]
    
    survey["Numeracy_3"] = survey["numeracy1"] + survey["numeracy2"] + survey["numeracy3"]
    
    survey["Bayesianreasoning_1"] = survey["bayesian1"]
    
    survey["TotalScore_19"] = survey["Certainty_5"] + survey["Uncertainty_2"] + survey["RiskComprehension_5"] + survey["GraphLiteracy_3"] + survey["Numeracy_3"] + survey["Bayesianreasoning_1"]
    
    survey["Certainty_%"] = survey["Certainty_5"] / 5 * 100
    survey["Uncertainty_%"] = survey["Uncertainty_2"] / 2 * 100
    survey["RiskComprehension_%"] = survey["RiskComprehension_5"] / 5 * 100
    survey["GraphLiteracy_%"] = survey["GraphLiteracy_3"] / 3 * 100
    survey["Numeracy_%"] = survey["Numeracy_3"] / 3 * 100
    survey["Bayesianreasoning_%"] = survey["Bayesianreasoning_1"] / 1 * 100
    
    survey["TotalScore_%"] = survey["TotalScore_19"] / 19 * 100

    #Dynata Only
    survey["RiskAppetite_7"] = survey["riskappetite"]
    survey["InvestmentXP_4"] = survey["investmentxp"]
    
    survey["FinancialPlanning1_6"] = survey["financialplanning1"] / 6
    survey["FinancialPlanning2_6"] = survey["financialplanning2"] / 8

    survey["RiskAssets_%"] = survey["riskyassets_%"]
    survey["RiskAlignment"] = survey["RiskAppetite_7"] * 100 / 7
    survey["RiskAlignment_residual"] = survey["RiskAssets_%"] - survey["RiskAlignment"]
    survey["RiskAlignment_felix"] = survey["RiskAssets_%"] * 7 / 100

    survey["SafeAssets_%"] = survey["safeassets_%"]
    
    survey["FinancialLiteracy_3"] = survey["financialliteracy"]

    survey["CRTScore_3"] = survey["crtscore"]

    survey["RiskAppetite1_%"] = survey["RiskAppetite_7"] / 7 * 100
    survey["RiskAppetite2_%"] = survey["InvestmentXP_4"] / 4 * 100

    survey["FinancialPlanning1_%"] = survey["FinancialPlanning1_6"] / 6 * 100
    survey["FinancialPlanning2_%"] = survey["FinancialPlanning2_6"] / 6 * 100

    survey["FinancialLiteracy_%"] = survey["FinancialLiteracy_3"] / 3 * 100
    survey["CRTScore_%"] = survey["CRTScore_3"] / 3 * 100

    survey["country_text"] = "India"
    survey["country"] = 0
    
     
    colReq1 = [
        "responseid", "country_text", "country", "age", "gender_text", "age_rec", "isced", "income", "wealth", "Certainty_5", "Uncertainty_2", "RiskComprehension_5", "GraphLiteracy_3",
        "Numeracy_3", "Bayesianreasoning_1", "Certainty_%", "Uncertainty_%", "RiskComprehension_%", "GraphLiteracy_%", "Numeracy_%", "Bayesianreasoning_%",
        "TotalScore_19", "TotalScore_%",
        #Dynata only
        "RiskAssets_%", "SafeAssets_%", "RiskAlignment", "RiskAlignment_residual", "RiskAlignment_felix",
        "RiskAppetite1_%", "RiskAppetite_7", "RiskAppetite2_%", "InvestmentXP_4",
        "FinancialPlanning1_6", "FinancialPlanning2_6", "FinancialPlanning1_%", "FinancialPlanning2_%",
        "FinancialLiteracy_%", "FinancialLiteracy_3",
        "CRTScore_3", "CRTScore_%"
    ]

    colReq2 = [
    "financialplanning_1", "financialplanning_2", "financialplanning_3", "financialplanning_4",
    "financialplanning_5", "financialplanning_6", "financialplanning_emergency", "financialplanning_retirement", 

    "savings_bank", "fixed_deposits", "recurring_deposits", "gov_bonds", "corp_bonds",
    "mutual_funds", "stocks", "real_estate", "commodities", "other_invest",

    "savings_bank_dummy", "gov_bonds_dummy", "mutual_funds_dummy", "stocks_dummy",
    "investment_behaviour", "risky_investments", "investments"
    ]
    
    survey1 = survey[colReq1 + scoreColumns + colReq2].copy()
    return survey1

In [15]:
df2 = scoring2(df1)

In [16]:
main = df2.copy()

In [17]:
main.to_pickle("India_Dynata_dataset_scoredOnly.pkl")

In [18]:
aa

NameError: name 'aa' is not defined

In [None]:
# wealth_quartile = pd.qcut(main.loc[~ (main["wealth"]==-1)]["wealth"],q = 4, labels = False, duplicates = "raise" ) + 1
# main["wealth_quartile"] = wealth_quartile

# main["risklit_quartile_income"] = pd.qcut(main.loc[~ (main["income"]==-1)]["TotalScore_%"],q = 4, labels = False, duplicates = "raise" ) + 1
# main["risklit_quartile_wealth"] = pd.qcut(main.loc[~ (main["wealth"]==-1)]["TotalScore_%"],q = 4, labels = False, duplicates = "raise" ) + 1

--------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------------------------------------------------------------

In [None]:
#Global Vras:
facet_cols = ["Certainty_5", "Uncertainty_2", "RiskComprehension_5", "GraphLiteracy_3",
              "Numeracy_3", "Bayesianreasoning_1"]

facet_percent_cols = ["Certainty_%", "Uncertainty_%", "RiskComprehension_%", "GraphLiteracy_%",
                      "Numeracy_%", "Bayesianreasoning_%",]

# Study 2 - India Dynata

### V1: Without any control variables

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())


### V2: With control of education

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced", "age_rec"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V4: With control of education and age and financial literacy

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V5: With control of education, age , financial literacy, CRT

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

---

## Ordinal Logistic Regression: Income Class by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V2: with control of education

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced", "FinancialLiteracy_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income"] = pd.Categorical(data["income"], ordered=True)

model = OrderedModel(data["income"], data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

---

---

## Ordinal Logistic Regression: Income Terciles by Risk literacy

### V1: Without any control variables

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())


### V2: With control of education

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced", "age_rec"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V4: With control of education and age and financial literacy

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V5: With control of education, age, financial literacy, CRT

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

---

## Ordinal Logistic Regression: Income Tercile by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V2: with control of education

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced", "FinancialLiteracy_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V4: with control of education,  financial literacy, CRT

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_tercile"] = pd.Categorical(data["income_tercile"], ordered=True)

model = OrderedModel(data["income_tercile"], data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

---

---

## Ordinal Logistic Regression: Income Quartiles by Risk literacy

### V1: Without any control variables

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())


### V2: With control of education

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced", "age_rec"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V4: With control of education and age and financial literacy

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V5: With control of education, age, financial literacy, CRT

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = income_filtered.copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

---

## Ordinal Logistic Regression: Income Quartile by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V2: with control of education

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced", "FinancialLiteracy_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = income_filtered.loc[income_filtered["age_rec"] == 3].copy()

data["income_quartile"] = pd.Categorical(data["income_quartile"], ordered=True)

model = OrderedModel(data["income_quartile"], data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

---

---

## Ordinal Logistic Regression: Wealth Quartiles by Risk literacy

### V1: Without any control variables

In [None]:
main

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

# data = wealth_filtered.copy()
data = main.loc[ main["wealth"] > 100000].copy().reset_index()

wealth_quartile = pd.qcut(data["wealth"],q = 4, labels = False, duplicates = "raise") + 1
data["wealth_quartile"] = wealth_quartile

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

risklit_quartile = pd.qcut(data["TotalScore_19"],q = 4, labels = False ) + 1
data["risklit_quartile"] = risklit_quartile.copy()

model = OrderedModel(data["wealth_quartile"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())


### V2: With control of education

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = wealth_filtered.copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = wealth_filtered.copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced", "age_rec"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = wealth_filtered.copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = wealth_filtered.copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

---

## Ordinal Logistic Regression: Wealth Quartiles by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = wealth_filtered.loc[wealth_filtered["age_rec"] == 3].copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data["risklit_quartile"],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V2: with control of education

In [None]:
data = wealth_filtered.loc[wealth_filtered["age_rec"] == 3].copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = wealth_filtered.loc[wealth_filtered["age_rec"] == 3].copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced", "FinancialLiteracy_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = wealth_filtered.loc[wealth_filtered["age_rec"] == 3].copy()

data["wealth_quartile"] = pd.Categorical(data["wealth_quartile"], ordered=True)

model = OrderedModel(data["wealth_quartile"], data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

res = model.fit(method='bfgs')
print(res.summary())

---

## WITH CONSTANT

## Linear Regression: Financial Planning 1 by Risk literacy

In [None]:
finplan1_data = main.copy()
risklit_quartile = pd.qcut(finplan1_data["TotalScore_19"],q = 4, labels = False ) + 1
finplan1_data["risklit_quartile"] = risklit_quartile.copy()

### V1: Without any control variables

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: With control of education

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()


model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

## Linear Regression: Financial Planning 1 by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: with control of education

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%"]]))

res = model.fit()
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]]))

res = model.fit()
print(res.summary())

---

## Linear Regression: Financial Planning 2 by Risk literacy

In [None]:
finplan2_data = main.copy()
risklit_quartile = pd.qcut(finplan2_data["TotalScore_19"],q = 4, labels = False ) + 1
finplan2_data["risklit_quartile"] = risklit_quartile.copy()

### V1: Without any control variables

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: With control of education

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()


model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

## Linear Regression: Financial Planning 2 by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: with control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%"]]))

res = model.fit()
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]]))

res = model.fit()
print(res.summary())

---

---

## WITHOUT CONSTANT

## Linear Regression: Financial Planning 1 by Risk literacy

In [None]:
finplan1_data = wealth_filtered.copy()
risklit_quartile = pd.qcut(finplan1_data["TotalScore_19"],q = 4, labels = False ) + 1
finplan1_data["risklit_quartile"] = risklit_quartile.copy()




In [None]:
cols = ["savings_bank", "gov_bonds", "mutual_funds", "stocks"]
[col + "_dummy" for col in cols]


In [None]:
finplan1_data

### V1: Without any control variables

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data["risklit_quartile"])

res = model.fit()
print(res.summary())

### V2: With control of education

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()


model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced"]])

res = model.fit()
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "age_rec"]])

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]])

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan1_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]])

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

## Linear Regression: Financial Planning 1 by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data["risklit_quartile"])

res = model.fit()
print(res.summary())

### V2: with control of education

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced"]])

res = model.fit()
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "FinancialLiteracy_%"]])

res = model.fit()
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = finplan1_data.loc[finplan1_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]])

res = model.fit()
print(res.summary())

---

## Linear Regression: Financial Planning 2 by Risk literacy

In [None]:
finplan2_data = main.copy()
risklit_quartile = pd.qcut(finplan2_data["TotalScore_19"],q = 4, labels = False ) + 1
finplan2_data["risklit_quartile"] = risklit_quartile.copy()

### V1: Without any control variables

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data["risklit_quartile"])

res = model.fit()
print(res.summary())

### V2: With control of education

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()


model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced"]])

res = model.fit()
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "age_rec"]])

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]])

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]])

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

## Linear Regression: Financial Planning 2 by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data["risklit_quartile"])

res = model.fit()
print(res.summary())

### V2: with control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced"]])

res = model.fit()
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "FinancialLiteracy_%"]])

res = model.fit()
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["FinancialPlanning1_%"], data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]])

res = model.fit()
print(res.summary())

---

---

## Linear Regression: Risk Alignment Residuals by Risk literacy

In [None]:
finplan2_data = main.copy()
risklit_quartile = pd.qcut(finplan2_data["TotalScore_19"],q = 4, labels = False ) + 1
finplan2_data["risklit_quartile"] = risklit_quartile.copy()

### V1: Without any control variables

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: With control of education

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()


model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

## Linear Regression: Risk Alignment residuals by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: with control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%"]]))

res = model.fit()
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_residual"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]]))

res = model.fit()
print(res.summary())

---

---

## Linear Regression: Risk Alignment (Felix) by Risk literacy

In [None]:
finplan2_data = main.copy()
risklit_quartile = pd.qcut(finplan2_data["TotalScore_19"],q = 4, labels = False ) + 1
finplan2_data["risklit_quartile"] = risklit_quartile.copy()

### V1: Without any control variables

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: With control of education

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()


model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: With control of education and age

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V4: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V5: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

## Linear Regression: Risk Alignement (Felix) by Risk literacy FOR 36-55 years old

### V1: without control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data["risklit_quartile"]))

res = model.fit()
print(res.summary())

### V2: with control of education

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced"]]))

res = model.fit()
print(res.summary())

### V3: with control of education and financial literacy

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%"]]))

res = model.fit()
print(res.summary())

### V4: with control of education, financial literacy, CRT

In [None]:
data = finplan2_data.loc[finplan2_data["age_rec"] == 3].copy()

model = sm.OLS(data["RiskAlignment_felix"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "FinancialLiteracy_%", "CRTScore_%"]]))

res = model.fit()
print(res.summary())

---

---

## Ordinal Logistic Regression: Emergency funds dummy by Risk literacy

### V1: with control of education, age, financial literacy

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = finplan2_data.copy()

data["financialplanning_emergency"] = pd.Categorical(data["financialplanning_emergency"], ordered=True)

model = OrderedModel(data["financialplanning_emergency"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V2: with control of education, age, financial literacy, CRT

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = finplan2_data.copy()

data["financialplanning_emergency"] = pd.Categorical(data["financialplanning_emergency"], ordered=True)

model = OrderedModel(data["financialplanning_emergency"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

---

---

## Ordinal Logistic Regression: Retirement funds dummy by Risk literacy

### V1: with control of education, age, financial literacy

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = finplan2_data.copy()

data["financialplanning_retirement"] = pd.Categorical(data["financialplanning_retirement"], ordered=True)

model = OrderedModel(data["financialplanning_retirement"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

### V2: with control of education, age, financial literacy, CRT

In [None]:
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

data = finplan2_data.copy()

data["financialplanning_retirement"] = pd.Categorical(data["financialplanning_retirement"], ordered=True)

model = OrderedModel(data["financialplanning_retirement"], data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]],
                     distr='logit')

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit(method='bfgs')
print(res.summary())

---

---


## Linear Regression: Investment Behaviour by Risk literacy

### V1: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["investment_behaviour"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V2: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["investment_behaviour"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

---

## Linear Regression: Investments by Risk literacy

### V1: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["investments"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V2: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["investments"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

---

## Linear Regression: Riskier Investments by Risk literacy

### V1: With control of education, age and Financial Lit

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["risky_investments"].abs(), sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

### V2: With control of education, age, Financial Lit, CRT

In [None]:
import statsmodels.api as sm

data = finplan2_data.copy()

model = sm.OLS(data["risky_investments"], sm.add_constant(data[["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"]]))

# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 55 y/o
# Age group 4 = 56 to 75 y/o 

res = model.fit()
print(res.summary())

---

---

## Willingness to take risk vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = (main["RiskAppetite1_%"] + main["RiskAppetite2_%"])/2, scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='Risk Appetite', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## Financial Planning 1 vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["FinancialPlanning1_%"], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='Financial Planning 1', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## Financial Planning 2 vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["FinancialPlanning2_%"], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='Financial Planning 2', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## Risky Assets ownership vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["RiskAssets_%"], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='Risky assets ownership', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## Safe Assets ownership vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["SafeAssets_%"], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='Safe assets ownership', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## Financial Literacy vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["FinancialLiteracy_%"], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='Financial Literacy', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## CRT vs risk literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["CRTScore_%"], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='CRT score', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

## Risk Alignment residuals vs Risk Literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["RiskAlignment_residual"].abs(), scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='RiskAlignment_residual', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 110, step = 10))
plt.show()

In [None]:
print("Correlation coefficient between Risk Lit and", 
      "absolute value of risk alignement residuals: \n", 
      main["TotalScore_%"].corr(main["RiskAlignment_residual"].abs(), method = "pearson"))

## Risk Alignment (Felix) vs Risk Literacy

In [None]:
fig, axis = plt.subplots(1, 1, figsize = [6, 6])

sns.regplot( x = main["TotalScore_%"], y = main["RiskAlignment_felix"].abs(), scatter_kws={"color": "blue"}, line_kws={"color": "red"})
axis.set(ylabel='RiskAlignment_felix', xlabel='Total Score_%')

axis.set_xticks(np.arange(0, 110, step = 10))
axis.set_yticks(np.arange(0, 8, step = 1))
plt.show()

In [None]:
print("Correlation coefficient between Risk Lit and", 
      "absolute value of risk alignement residuals: \n", 
      main["TotalScore_%"].corr(main["RiskAlignment_felix"].abs(), method = "pearson"))

## Table Output

In [None]:

def regression_output(data: pd.DataFrame, regression_type: str, dependent_variable: str, independent_variable: str,
                      control_variables: list, depvar_absval: bool = False) -> pd.DataFrame :
    
    if depvar_absval == True:
        data[dependent_variable] = data[dependent_variable].abs().copy()

    if regression_type == "OLS":

        model = sm.OLS(data[dependent_variable],
                       sm.add_constant(data[[independent_variable] + control_variables]))
        res = model.fit()
    
    elif regression_type == "OrderedModel":

        data[dependent_variable] = pd.Categorical(data[dependent_variable], ordered=True)

        model = OrderedModel(data[dependent_variable],
                             data[[independent_variable] + control_variables], distr='logit')
        res = model.fit(method='bfgs')

    else:
        raise ValueError("Unsupported regression_type. Use 'OLS' or 'OrderedModel'.")
    
    def format_coefficient(coef, pval):
        if pval < 0.01:
            stars = "***"
        elif pval < 0.05:
            stars = "**"
        elif pval < 0.10:
            stars = "*"
        else:
            stars = ""
        return f"{coef:.3f}{stars}"
    
    def format_p_value(pval):
        return f'="({pval:.3f})"'
    
    # output = pd.DatFrame(index = ["risklit_quartile", "isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"])

    rows = []
    for var, coeff, pval in zip(res.params.index, res.params, res.pvalues):
        rows.append([var, str(format_coefficient(coeff, pval))])
        rows.append(["", str(format_p_value(pval))])
    
    output = pd.DataFrame(data = rows, columns = [regression_type, dependent_variable])
    return output


In [None]:
data = finplan2_data.copy()
import csv
pd.concat([ regression_output(data = data, regression_type = "OLS", dependent_variable = "risky_investments",
                              independent_variable = "risklit_quartile", control_variables = ["isced", "age_rec", "FinancialLiteracy_%", "CRTScore_%"],
                              depvar_absval = False),
            regression_output(data = data, regression_type = "OLS", dependent_variable = "risky_investments",
                              independent_variable = "risklit_quartile", control_variables = ["isced", "age_rec", "FinancialLiteracy_%"],
                              depvar_absval = False)],
                              axis = 1).replace({pd.NA:""})

