# Feature Creation 

We first import useful libraries:

In [56]:
import pandas as pd
import warnings
import numpy as np
import math

In [57]:
df=pd.read_csv('clean_data.csv', index_col=0)

We first start by creating the new features and initialising them to zero:

In [58]:
df['fin_knowledge_score']=0 # the higher the more the respondent is knowledegeable in the financial field 
df['Debtor']=0 # the higher the more probable the respondent is in debt
df['Perc_Fin_Lit']=0 # the higher the more the respondent feels confident financially
df['Saver_Spender']=0 # high values are associated with savers, low values with spenders
df['Planner_Easygoer']=0 # high values are associated with planners, low with respondents who usually are more easy going
df['Fin_Products_Experience']=0 # represents experience with financial products
df['Fin_Indipendence']=0 # represets one's ability to provide financial solutions independently
df['Approach_Digitalised']=0 # the higher the more digitalised the respondent approach to financial matters is

## Financial Knowledge Score

The idea here is very simple: since each question is either correct, incorrect or missing/no reply, we can treat this as an exam with +1 points for each correct answer, -1 points for each incorrect answer and 0 if the answer was "blank" (no reply or no data). Note that the choice of points is established such that random replies would lead to an expected value of zero.

In [59]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    if df['qk3'][i]==-99:
        df['qk3'][i]=0
    if df['qk3'][i]==-1:
        df['qk3'][i]=-1
    if df['qk3'][i]==3:
        df['qk3'][i]=1

In [60]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    if df['qk4'][i]==0:
        df['qk4'][i]=1
    if df['qk4'][i]==-1:
        df['qk4'][i]=-1
    if df['qk4'][i]==-99:
        df['qk4'][i]=0

In [61]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    if df['qk5'][i]==102:
        df['qk5'][i]=1
    if df['qk5'][i]==-1:
        df['qk5'][i]=-1
    if df['qk5'][i]==-99:
        df['qk5'][i]=0

In [62]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    if df['qk6'][i]==-1:
        df['qk6'][i]=-1
    if df['qk6'][i]==-99:
        df['qk6'][i]=0

In [63]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    if df['qk7_1'][i]==-1:
        df['qk7_1'][i]=-1
    if df['qk7_2'][i]==-1:
        df['qk7_2'][i]=-1
    if df['qk7_3'][i]==-1:
        df['qk7_3'][i]=-1
    if df['qk7_1'][i]==-99:
        df['qk7_1'][i]=0
    if df['qk7_2'][i]==-99:
        df['qk7_2'][i]=0
    if df['qk7_3'][i]==-99:
        df['qk7_3'][i]=0

In [64]:
df['fin_knowledge_score']=df[['qk3', 'qk4', 'qk5', 'qk6', 'qk7_1', 'qk7_2', 'qk7_3']].sum(axis=1)

In [65]:
max(df['fin_knowledge_score'])

7

In [66]:
min(df['fin_knowledge_score'])

-7

In [67]:
df.drop(['qk3', 'qk4', 'qk5', 'qk6', 'qk7_1', 'qk7_2', 'qk7_3'], axis=1, inplace=True)

## Debtor or not

We can define a function that, given a row in our df, will compute the value of the debtor variable for that given row: higher meaning "more debt" and lower meaning "debt averse". Weights for all variables are encoded within the function.

In [68]:
def create_debtor(df, row):
    tot=df['qprod1c_3'][row]+0.5*df['qprod1c_5'][row] # loan (qprod1c_5) or mortgage (qprod1c_3)
    if df['qf12_3_e'][row]!=-99: 
        tot=tot+1*df['qf12_3_e'][row] # debt towards family and friends (TTM)
    if df['qf12_3_f'][row]!=-99:
        tot=tot+1*df['qf12_3_f'][row]  # salary advance (TTM)
    if df['qf12_6_q'][row]!=-99:
        tot=tot+0.5*df['qf12_6_q'][row]  # missing payments (bills late, TTM)
    if df['qf12_4k_5m_5o_6p'][row]!=-99:
        tot=tot+df['qf12_4k_5m_5o_6p'][row] # credit cards, loan, unsecured loan, overdraft
    if df['qf10_4'][row]!=-99:
        tot=tot+0.25*(-3+df['qf10_4'][row]) # pay bills on time (5 disagree, 1 agree)
    if df['qf10_11'][row]!=-99:
        tot=tot+2*(3-df['qf10_11'][row]) # too much debt rn (5 disagree, 1 agree)
    if df['qf10_9'][row]!=-99:
        tot=tot+(3-df['qf10_9'][row])/4 # financial situation is a limit on important stuff (5 disagree, 1 agree)
    if df['qf10_10'][row]!=-99:
        tot=tot+(3-df['qf10_10'][row])/4 # worry about normal living expenses (5 disagree, 1 agree)
    return tot

Now we create and fill the variable:

In [69]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Debtor'][i]=create_debtor(df, i)

To better understand our variable we can check its maximum and minimum variable both theoretical and in the dataset:

In [70]:
max(df['Debtor'])

9.0

In [71]:
min(df['Debtor'])

-5.5

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [72]:
df[['qprod1c_3', 'qprod1c_5', 'qf10_4', 'qf10_9', 'qf10_11', 'qf10_10', 'qf12_3_e', 'qf12_4k_5m_5o_6p', 'qf12_6_q', 'qf12_3_f', 'Debtor']][df['Debtor']==max(df['Debtor'])]

Unnamed: 0,qprod1c_3,qprod1c_5,qf10_4,qf10_9,qf10_11,qf10_10,qf12_3_e,qf12_4k_5m_5o_6p,qf12_6_q,qf12_3_f,Debtor
1425,1,1,3,1,1,1,1.0,1.0,1.0,0.0,9.0


In [73]:
df[['qprod1c_3', 'qprod1c_5', 'qf10_4', 'qf10_9', 'qf10_11', 'qf10_10', 'qf12_3_e', 'qf12_4k_5m_5o_6p', 'qf12_6_q', 'qf12_3_f', 'Debtor']][df['Debtor']==min(df['Debtor'])]

Unnamed: 0,qprod1c_3,qprod1c_5,qf10_4,qf10_9,qf10_11,qf10_10,qf12_3_e,qf12_4k_5m_5o_6p,qf12_6_q,qf12_3_f,Debtor
26,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
36,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
37,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
62,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
77,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
...,...,...,...,...,...,...,...,...,...,...,...
1842,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
1863,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
1879,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5
1939,0,0,1,5,5,5,0.0,0.0,0.0,0.0,-5.5


If we compute the theoretical maximum and minimum variable we get: 

theoretical max: 10.5


theoretical min: -5.5

Which indicate that many were debt averse.

## Perceived Financial Literacy

This variable tells us how much the respondent felt confident in its financial knowledge: the higher, the higher the perceived knowledge. As before, after creation we compute maximum, minimum and theoretical maximum and minimum.

In [74]:
def create_perceived_fin_lit(df, row):
    tot=0
    if df['qf8'][row]!=-99 and df['qf8'][row]!=-97 and df['qf8'][row]!=6: # confidence in making good plans financially (5 disagree, 1 agree)
        tot=tot+(3-df['qf8'][row])
    if df['qprod3_17'][row]!=-99: # one's experience were the main driver in influencing its financial decision
        tot=tot+0.5*df['qprod3_17'][row]  
    if df['qprod2'][row]!=-99 and df['qprod2'][row]!=4: # how did you choose investment products (multiple choice regarding how was made)
        tot=tot+(2-df['qprod2'][row]) 
    if df['qf10_6'][row]!=-99: # I keep a close eye on my financial affairs (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_6'][row])/4
    if df['qf10_12'][row]!=-99: # I'm satisfied w my financial situation rn (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_12'][row])/4
    if df['qk1'][row]!=-99: # overall perceived knowledge about financial matters 
        tot=tot+(3-df['qk1'][row])
    return tot

In [75]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Perc_Fin_Lit'][i]=create_perceived_fin_lit(df, i)

In [76]:
max(df['Perc_Fin_Lit'])

6.0

In [77]:
min(df['Perc_Fin_Lit'])

-5.25

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [78]:
df[['qf8', 'qprod3_17', 'qprod2', 'qf10_6', 'qf10_12', 'qk1', 'Perc_Fin_Lit']][df['Perc_Fin_Lit']==max(df['Perc_Fin_Lit'])]

Unnamed: 0,qf8,qprod3_17,qprod2,qf10_6,qf10_12,qk1,Perc_Fin_Lit
947,1,0.0,1.0,1,1,1,6.0


In [79]:
df[['qf8', 'qprod3_17', 'qprod2', 'qf10_6', 'qf10_12', 'qk1', 'Perc_Fin_Lit']][df['Perc_Fin_Lit']==min(df['Perc_Fin_Lit'])]

Unnamed: 0,qf8,qprod3_17,qprod2,qf10_6,qf10_12,qk1,Perc_Fin_Lit
41,5,0.0,3.0,2,5,5,-5.25


If we compute the theoretical maximum and minimum variable we get: 

Theoretical maximum: 6.5

Theoretical minimum: -6.0

## Saver or not

This variable tells us how much the respondent is a saver or someone that tends to spend: the higher, the more the respondent saves. As before, after creation we compute maximum, minimum and theoretical maximum and minimum.

In [80]:
def saver_spender(df, row):
    tot=2*df['qf3_1'][row]+2*df['qf3_3'][row]+2*df['qf3_4'][row]-2*df['qf3_8'][row]+df['qprod1c_8'][row]+df['qprod1c_7'][row]+df['qf3_6'][row]+df['qf3_7'][row]
    # variables above, in order, are: qf3_1 - saved cash, qf3_3 - saving account, qf3_4 - savings given to family member, qf3_8 - not acively saving
    # qprod1c_8 - opened saving account, qprod1c_7 - opened checking account, qf3_6 - saved buying inv. products, qf3_7 - saved buying livestock or other
    if df['qf11'][row]!=-99:
        if df['qf11'][row]==0:
            tot=tot+0.1
        else: # TTM income not sufficient to cover living costs
            tot=tot-2
    if df['qf12_1_a'][row]!=-99: # used savings to cover expenses (TTM)
        tot=tot+0.5*df['qf12_1_a'][row]  
    if df['qf12_1_b'][row]!=-99: # reduced spending (TTM)
        tot=tot+df['qf12_1_b'][row] 
    if df['qf13'][row]!=-99: # how long can you cover expenses w/o borrowing 
        tot=tot+df['qf13'][row]*0.25
    if df['qf10_1'][row]!=-99: # can I buy this? (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_1'][row])/2 
    if df['qf10_3'][row]!=-99: # I enjoy more spending than saving (5 disagree, 1 agree)
        tot=tot+(-3+df['qf10_3'][row])/2
    if df['qf10_8'][row]!=-99: # money is there to be spent (5 disagree, 1 agree)
        tot=tot+(-3+df['qf10_8'][row])/2
    if df['qf10_5'][row]!=-99: # ready to risk some of my savings (implies there are savings) (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_5'][row])/2
    if df['qf4'][row]!=-99 and df['qf4'][row]!=2:
        tot=tot+0.5*df['qf4'][row] # could you cover living expenses without borrowing?
    
    return tot

In [81]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Saver_Spender'][i]=saver_spender(df, i)

In [82]:
max(df['Saver_Spender'])

10.85

In [83]:
min(df['Saver_Spender'])

-6.25

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [84]:
df[['qf3_1', 'qf3_3', 'qf3_4', 'qf3_8', 'qprod1c_8', 'qprod1c_7', 'qf3_6', 'qf3_7', 'qf11', 'qf12_1_a', 'qf12_1_b', 'qf13', 'qf10_1', 'qf10_3', 'qf10_8', 'qf10_5', 'qf4', 'Saver_Spender']][df['Saver_Spender']==min(df['Saver_Spender'])]

Unnamed: 0,qf3_1,qf3_3,qf3_4,qf3_8,qprod1c_8,qprod1c_7,qf3_6,qf3_7,qf11,qf12_1_a,qf12_1_b,qf13,qf10_1,qf10_3,qf10_8,qf10_5,qf4,Saver_Spender
1396,0,0,0,1,0,0,0,0,1,0.0,0.0,3,5,1,1,3,0,-6.25


In [85]:
df[['qf3_1', 'qf3_3', 'qf3_4', 'qf3_8', 'qprod1c_8', 'qprod1c_7', 'qf3_6', 'qf3_7', 'qf11', 'qf12_1_a', 'qf12_1_b', 'qf13', 'qf10_1', 'qf10_3', 'qf10_8', 'qf10_5', 'qf4', 'Saver_Spender']][df['Saver_Spender']==max(df['Saver_Spender'])]

Unnamed: 0,qf3_1,qf3_3,qf3_4,qf3_8,qprod1c_8,qprod1c_7,qf3_6,qf3_7,qf11,qf12_1_a,qf12_1_b,qf13,qf10_1,qf10_3,qf10_8,qf10_5,qf4,Saver_Spender
317,0,1,0,0,1,1,1,1,0,0.0,0.0,5,1,5,5,3,1,10.85


If we compute the theoretical maximum and minimum variable we get: 

Theoretical maximum: 15.85

Theoretical minimum: -7.75

## Planner or not

This variable tells us how much the respondent is a planner or someone that tends to plan: the higher, the more the respondent usually plans. As before, after creation we compute maximum, minimum and theoretical maximum and minimum.

In [86]:
def planner_not(df, row):
    tot=0
    if df['qf1'][row]!=-99: # Responsibility for money within family (higher less responsible)
        tot=tot+(2-df['qf1'][row])
    if df['qf2'][row]!=-99: # Household budget or not
        tot=tot+0.5*df['qf2'][row] 
    if df['qf4'][row]!=-99 and df['qf4'][row]!=2: # could you cover living expenses without borrowing?
        tot=tot+df['qf4'][row] 
    if df['qf10_2'][row]!=-99: # tend to live for today and let tomorrow take care of itself (5 disagree, 1 agree)
        tot=tot+(-3+df['qf10_2'][row])
    if df['qf10_5'][row]!=-99: # ready to risk some savings in investment (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_5'][row])
    if df['qf10_6'][row]!=-99: # keep close eye on fin affairs (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_6'][row])/2
    if df['qf10_7'][row]!=-99: # set long term fin goals (5 disagree, 1 agree)
        tot=tot+(3-df['qf10_7'][row])
    if df['qf13'][row]!=-99: # how long can you cover exp w/o borrowing (higher -> longer)
        tot=tot+df['qf13'][row]*0.25
    counter=0
    if counter==0:
        tot=tot+df['qf9_1_9'][row] # pensions to cover retirement spending
        counter=df['qf9_1_9'][row]
    if counter==0:
        tot=tot+df['qf9_2_3'][row] # pension fund and PIP to cover retirement
        counter=df['qf9_2_3'][row]
    if counter==0:
        tot=tot+df['qf9_4'][row] # selling fin assets to cover retirement
        counter=df['qf9_4'][row]
    if counter==0:
        tot=tot+df['qf9_5'][row] # selling your own possessions to cover retirement
        counter=df['qf9_5'][row]
    if counter==0:
        tot=tot+df['qf9_6'][row] # using financial and non assets income to cover retirement
        counter=df['qf9_6'][row]
    return tot

In [87]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Planner_Easygoer'][i]=planner_not(df, i)

In [88]:
max(df['Planner_Easygoer'])

10.75

In [89]:
min(df['Planner_Easygoer'])

-8.0

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [90]:
df[['qf1', 'qf2', 'qf4', 'qf10_2', 'qf10_5', 'qf10_6', 'qf10_7', 'qf13', 'qf9_1_9', 'qf9_2_3', 'qf9_4', 'qf9_5', 'qf9_6', 'Planner_Easygoer']][df['Planner_Easygoer']==max(df['Planner_Easygoer'])]

Unnamed: 0,qf1,qf2,qf4,qf10_2,qf10_5,qf10_6,qf10_7,qf13,qf9_1_9,qf9_2_3,qf9_4,qf9_5,qf9_6,Planner_Easygoer
1042,1,1,1,5,2,1,1,5,1,0,1,0,1,10.75
1095,1,1,1,5,2,1,1,5,1,1,0,0,0,10.75
1145,2,1,1,5,1,1,1,5,1,0,0,0,0,10.75
1460,1,1,1,5,2,1,1,5,1,0,1,0,0,10.75


In [91]:
df[['qf1', 'qf2', 'qf4', 'qf10_2', 'qf10_5', 'qf10_6', 'qf10_7', 'qf13', 'qf9_1_9', 'qf9_2_3', 'qf9_4', 'qf9_5', 'qf9_6', 'Planner_Easygoer']][df['Planner_Easygoer']==min(df['Planner_Easygoer'])]

Unnamed: 0,qf1,qf2,qf4,qf10_2,qf10_5,qf10_6,qf10_7,qf13,qf9_1_9,qf9_2_3,qf9_4,qf9_5,qf9_6,Planner_Easygoer
1918,3,0,2,1,5,5,5,-99,0,0,0,0,0,-8.0


If we compute the theoretical maximum and minimum variable we get: 

Theoretical maximum: 11.75

Theoretical minimum: -8.0

## Experience with Financial products

This variable tells us how much the respondent has experience with financial products: the higher, the more the experience. As before, after creation we compute maximum, minimum and theoretical maximum and minimum.

In [92]:
def finexp(df, row):
    tot=0.6*df['qprod1c_1'][row]+0.9*df['qprod1c_2'][row]+\
        df['qprod1c_6'][row]+0.4*df['qprod1c_3'][row]+\
            0.3*df['qprod1c_7'][row]+0.6*df['qprod1c_8'][row]+\
                0.1*df['qprod1c_10'][row]+df['qprod1c_11'][row]+\
                    df['qprod1c_12'][row]+0.4*df['qprod1c_14'][row]+\
                        df['qf9_4'][row]+df['qf3_7'][row]+1.5*df['qf3_6'][row]\
                            -0.5*df['qf3_4'][row]+0.5*df['qf3_3'][row]+\
                                0.3*df['qf3_1'][row]
    return tot
# in order, qprod1c_1 - any pension product; qprod1c_2 - common investment fund; 
# qprod1c_6 - credit cards; qprod1c_3 - mortgages; qprod1c_7 - checking account; 
# qprod1c_8 - savings account; qprod1c_10 - insurance; qprod1c_11 - stocks; 
# qprod1c_12 - bonds (James, plural); qprod1c_14 prepaid/debit card; 
# qf9_4 - implies having financial assets; qf3_7 - jewels, property, other; 
# qf3_6 - buying inv. product but NOT pension funds; qf3_4 - giving savings to family member; 
# qf3_3 - using saving or checking account; qf3_1 - saving cash at home/wallet

In [93]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Fin_Products_Experience'][i]=finexp(df, i)

In [94]:
max(df['Fin_Products_Experience'])

8.5

In [95]:
min(df['Fin_Products_Experience'])

-0.5

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [96]:
df[['qprod1c_1', 'qprod1c_2', 'qprod1c_6', 'qprod1c_3', 'qprod1c_7', 'qprod1c_8', 'qprod1c_10', 'qprod1c_11', 'qprod1c_12', 'qprod1c_14', 'qf9_4', 'qf3_7', 'qf3_6', 'qf3_4' , 'qf3_3','qf3_1', 'Fin_Products_Experience']][df['Fin_Products_Experience']==max(df['Fin_Products_Experience'])]

Unnamed: 0,qprod1c_1,qprod1c_2,qprod1c_6,qprod1c_3,qprod1c_7,qprod1c_8,qprod1c_10,qprod1c_11,qprod1c_12,qprod1c_14,qf9_4,qf3_7,qf3_6,qf3_4,qf3_3,qf3_1,Fin_Products_Experience
315,1,0,1,0,1,1,0,1,1,0,1,1,1,0,1,0,8.5


In [97]:
df[['qprod1c_1', 'qprod1c_2', 'qprod1c_6', 'qprod1c_3', 'qprod1c_7', 'qprod1c_8', 'qprod1c_10', 'qprod1c_11', 'qprod1c_12', 'qprod1c_14', 'qf9_4', 'qf3_7', 'qf3_6', 'qf3_4' , 'qf3_3','qf3_1', 'Fin_Products_Experience']][df['Fin_Products_Experience']==min(df['Fin_Products_Experience'])]

Unnamed: 0,qprod1c_1,qprod1c_2,qprod1c_6,qprod1c_3,qprod1c_7,qprod1c_8,qprod1c_10,qprod1c_11,qprod1c_12,qprod1c_14,qf9_4,qf3_7,qf3_6,qf3_4,qf3_3,qf3_1,Fin_Products_Experience
84,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
205,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
269,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
323,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
414,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
478,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
542,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
562,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
574,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5
601,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-0.5


If we compute the theoretical maximum and minimum variable we get: 

Theoretical maximum: 10.6

Theoretical minimum: -0.5

## Digital Approach or not

This variable tells us how much the respondent has a tendency for a digital approach: the higher, the more the approach is digitalised. As before, after creation we compute maximum, minimum and theoretical maximum and minimum.

In [98]:
def digapproach(df, row):
    tot=-0.5*df['qf3_1'][row]-0.5*df['qf3_4'][row] # qf3_1 - saving cash at home and in wallet; qf3_4 - giving savings to family member
    if df['qprod3_1_16'][row]!=-99: # unsolicited info through post or other ads (physical)
        tot=tot-0.3*df['qprod3_1_16'][row]
    if df['qprod3_2'][row]!=-99: # info picked up in a branch 
        tot=tot-0.3*df['qprod3_2'][row]
    if df['qprod3_3_6'][row]!=-99: # info about products found on the Internet 
        tot=tot+df['qprod3_3_6'][row]
    if df['qprod3_4'][row]!=-99: # info given by someone selling the product 
        tot=tot-0.2*df['qprod3_4'][row]
    if df['qprod3_5_7_12_14'][row]!=-99: # Comparative table, newspapers, journals, daily newspapers
        tot=tot-df['qprod3_5_7_12_14'][row]
    if df['qprod3_13_15'][row]!=-99: # TV or radio programs
        tot=tot-0.1*df['qprod3_13_15'][row]
    if df['qprod3_8'][row]!=-99: # products recommended by independent financial advisor
        tot=tot-0.1*df['qprod3_8'][row]
    return tot

In [99]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Approach_Digitalised'][i]=digapproach(df, i)

In [100]:
max(df['Approach_Digitalised'])

1.0

In [101]:
min(df['Approach_Digitalised'])

-2.2

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [102]:
df[['qf3_1', 'qf3_4', 'qprod3_1_16', 'qprod3_2', 'qprod3_3_6', 'qprod3_4', 'qprod3_5_7_12_14', 'qprod3_13_15', 'qprod3_8', 'Approach_Digitalised']][df['Approach_Digitalised']==max(df['Approach_Digitalised'])]

Unnamed: 0,qf3_1,qf3_4,qprod3_1_16,qprod3_2,qprod3_3_6,qprod3_4,qprod3_5_7_12_14,qprod3_13_15,qprod3_8,Approach_Digitalised
32,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
119,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
120,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
244,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
275,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
290,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
295,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
303,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
311,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
315,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [103]:
df[['qf3_1', 'qf3_4', 'qprod3_1_16', 'qprod3_2', 'qprod3_3_6', 'qprod3_4', 'qprod3_5_7_12_14', 'qprod3_13_15', 'qprod3_8', 'Approach_Digitalised']][df['Approach_Digitalised']==min(df['Approach_Digitalised'])]

Unnamed: 0,qf3_1,qf3_4,qprod3_1_16,qprod3_2,qprod3_3_6,qprod3_4,qprod3_5_7_12_14,qprod3_13_15,qprod3_8,Approach_Digitalised
1979,0,1,1.0,1.0,0.0,0.0,1.0,1.0,0.0,-2.2


If we compute the theoretical maximum and minimum variable we get: 

Theoretical maximum: 1.0

Theoretical minimum: -3.0

## Independence tendency in financial matters

This variable tells us how much the respondent has a tendency in being independent around financial matters: the higher, the more the approach is independent. As before, after creation we compute maximum, minimum and theoretical maximum and minimum.

In [104]:
def indip(df, row):
    tot=0.5*df['qf3_1'][row]-df['qf3_4'][row]-df['qf9_7_8'][row]+0.8*df['qf9_4'][row]+df['qf9_5'][row]+df['qf9_6'][row]
    # in order, qf3_1 - saving cash at home and in wallet; qf3_4 - giving savings to family member; qf9_7_8 - relying on family for retirement
    # qf9_4 - sell your fin assets; qf9_5 - sell your non fin assets; qf9_6 - use your financial or non financial assets for retirement;
    if df['qf1'][row]!=-99: # Responsibility for money within family (higher less responsible)
        tot=tot+(2-df['qf1'][row])
    if df['qprod3_17'][row]!=-99: # my own experience influences choice in buying financial assets
        tot=tot+0.7*df['qprod3_17'][row]
    if df['qprod3_4'][row]!=-99: # choice influenced by sales staff
        tot=tot-0.5*df['qprod3_4'][row]
    if df['qprod3_8'][row]!=-99: # choice influenced by independent financial adviser
        tot=tot-df['qprod3_8'][row]
    if df['qprod3_9'][row]!=-99: # advice of friends/relatives (not in financial space)
        tot=tot-df['qprod3_9'][row]
    if df['qprod3_10'][row]!=-99: # advice of friends/relatives (in financial space)
        tot=tot-df['qprod3_10'][row]
    if df['qprod3_11'][row]!=-99: # advice of employer
        tot=tot-0.5*df['qprod3_11'][row]
    if df['qf4'][row]==1: # could you cover living expenses without borrowing?
        tot=tot+df['qf4'][row]
    if df['qf4'][row]==0:
        tot=tot-1
    counter1=0
    if counter1==0 and df['qf12_1_b'][row]!=-99: # reduced expenses to make up for living costs
        tot=tot+df['qf12_1_b'][row]
        counter1=df['qf12_1_b'][row]
    if counter1==0 and df['qf12_1c_3g'][row]!=-99: # sell something you own to make up for living costs
        tot=tot+df['qf12_1c_3g'][row]
        counter1=df['qf12_1c_3g'][row]
    if counter1==0 and df['qf12_2_d'][row]!=-99: # work overtime to earn more to make up to living costs
        tot=tot+df['qf12_2_d'][row]
        counter1=df['qf12_2_d'][row]
    if counter1==0 and df['qf12_4_l'][row]!=-99: # use credit card to get some money in advance
        tot=tot+df['qf12_4_l'][row]
        counter1=df['qf12_4_l'][row]
    counter2=0
    if counter2==0 and df['qf12_3_f'][row]!=-99: # borrow from employer (salary advance)
        tot=tot-df['qf12_3_f'][row]
        counter2=df['qf12_3_f'][row]
    if counter2==0 and df['qf12_3_e'][row]!=-99: # borrow from family/friends
        tot=tot-df['qf12_3_e'][row]
        counter2=df['qf12_3_e'][row]
    if counter2==0 and df['qf12_4k_5m_5o_6p'][row]!=-99: # credit cards, loan, unsecured loan, overdraft
        tot=tot-df['qf12_4k_5m_5o_6p'][row]
        counter2=df['qf12_4k_5m_5o_6p'][row]
    return tot

In [105]:
warnings.filterwarnings('ignore')
for i in range(len(df)):
    df['Fin_Indipendence'][i]=indip(df, i)

In [106]:
max(df['Fin_Indipendence'])

5.5

In [107]:
min(df['Fin_Indipendence'])

-4.5

We can also "see" who had the lowest value and highest value within the dataset, and their replies:

In [108]:
df[['qf3_1', 'qf3_4', 'qf9_7_8', 'qf9_4', 'qf9_5', 'qf9_6', 'qf1', 'qprod3_17', 'qprod3_4', 'qprod3_8', 'qprod3_9', 'qprod3_10', 'qprod3_11', 'qf4', 'qf12_1_b', 'qf12_1c_3g', 'qf12_2_d', 'qf12_4_l', 'qf12_3_f', 'qf12_3_e', 'qf12_4k_5m_5o_6p', 'Fin_Indipendence']][df['Fin_Indipendence']==max(df['Fin_Indipendence'])]

Unnamed: 0,qf3_1,qf3_4,qf9_7_8,qf9_4,qf9_5,qf9_6,qf1,qprod3_17,qprod3_4,qprod3_8,...,qprod3_11,qf4,qf12_1_b,qf12_1c_3g,qf12_2_d,qf12_4_l,qf12_3_f,qf12_3_e,qf12_4k_5m_5o_6p,Fin_Indipendence
691,0,0,0,1,1,1,1,1.0,0.0,0.0,...,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.5


In [109]:
df[['qf3_1', 'qf3_4', 'qf9_7_8', 'qf9_4', 'qf9_5', 'qf9_6', 'qf1', 'qprod3_17', 'qprod3_4', 'qprod3_8', 'qprod3_9', 'qprod3_10', 'qprod3_11', 'qf4', 'qf12_1_b', 'qf12_1c_3g', 'qf12_2_d', 'qf12_4_l', 'qf12_3_f', 'qf12_3_e', 'qf12_4k_5m_5o_6p', 'Fin_Indipendence']][df['Fin_Indipendence']==min(df['Fin_Indipendence'])]

Unnamed: 0,qf3_1,qf3_4,qf9_7_8,qf9_4,qf9_5,qf9_6,qf1,qprod3_17,qprod3_4,qprod3_8,...,qprod3_11,qf4,qf12_1_b,qf12_1c_3g,qf12_2_d,qf12_4_l,qf12_3_f,qf12_3_e,qf12_4k_5m_5o_6p,Fin_Indipendence
2127,0,0,1,0,1,0,3,0.0,1.0,1.0,...,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.5


If we compute the theoretical maximum and minimum variable we get: 

Theoretical maximum: 7.0

Theoretical minimum: -9.0

Now we can go ahead and delete some fetaures that were not adding relevant information and did not integrate well within the new ones:

In [55]:
df.drop(['qf9_10', 'qprod3_18', 'qf12_7_r'], axis=1, inplace=True)

And now we can save our file:

In [56]:
df.to_csv('aggr_data.csv')