# Functions

In [1]:
import pandas as pd 
import scipy
from scipy.stats import mannwhitneyu
import pandas as pd 

# Concatenation with test on NA
def new_concat(df1, df2):
    if (df2.count()!=0).all():
        if (df1.count()!=0).all():
            return pd.concat([df1, df2])
        else:
            return df2
    else:
        return df1

# Returns the statistics in funcs on groups and variables columns
def des_stats(df, funcs, group_label, columns):
    df_results=pd.DataFrame()
    for column in columns:
        df_results=new_concat(df_results, df.groupby(group_label)[column].agg(funcs).reset_index().assign(var=column))
            
    return df_results

# Mode or most frequent result 
def mode(x):
    return scipy.stats.mode(x.dropna())[0]

# Total number of records
def total(x):
    return len(x)

# Count the number of NA
def count_na(x):
    return x.isna().sum()

# Percentage of NA
def percent_na(x):
    return x.isna().sum()/len(x)*100

# Frequency table

def freq_table(df, column, start_index, category_labels):
    df_results=pd.DataFrame(columns=['Response', 'Frequency', 'Percentage'])
    for index, category_label in enumerate(category_labels):
        df_results=new_concat(df_results, pd.DataFrame([[category_label, len(df[df[column]==(index+start_index)]), round(100*len(df[df[column]==(index+start_index)])/len(df[column]),2)]], columns=df_results.columns))
    # Count NA
    df_results=new_concat(df_results, pd.DataFrame([['NA', df[column].isna().sum(), round(100*df[column].isna().sum()/len(df[column]),2)]], columns=df_results.columns))
    # Count total
    df_results=new_concat(df_results, pd.DataFrame([['Total', len(df[column]), 100.00]], columns=df_results.columns))
    return df_results
    

def mw_test(df,columns, condition_label, condition_values):
    result_columns=['var', 'u_stat', 'pvalue']
    # df_results=pd.DataFrame(columns=result_columns)
    df_results=pd.DataFrame()
    for column in columns:
        
        df_temp=df.dropna(subset=[column, condition_label]) # Keep only non nan

        groups=[]
        for i in range(len(condition_values)):
            groups.append(df_temp[df_temp[condition_label]==condition_values[i]].loc[:,column].array)

        u, p=mannwhitneyu(groups[0], groups[1], alternative='two-sided')
        df_results=new_concat(df_results, pd.DataFrame([[column, u, p]], columns=result_columns))
    
    return df_results

def des_stats_table(df, funcs, group_label, columns):
    df_temp=des_stats(df, funcs, group_label, columns)
    df_temp=df_temp.reindex(columns=['var']+[col for col in df_temp.columns if col != 'var']) # Position 'var' column in first place
    values=df[group_label].unique()
    df_results=df_temp[df_temp[group_label]==values[0]].merge(df_temp[df_temp[group_label]==values[1]], left_on='var', right_on='var', suffixes=('_'+str(values[0]),'_'+str(values[1])))
    # Merge with WM test
    df_results=df_results.merge(mw_test(df,columns,group_label,values).loc[:,['var', 'pvalue']], left_on='var', right_on='var',)
    
    return df_results

# Build the dataframe for multiple choice questions

def mcq_trans(df, col):
    df_results=df.copy()
    df_results[col]=df_results[col].str.split(',')
    df_results=df_results.explode(col, ignore_index=False)
    df_results[col]=df_results[col].apply(lambda x: x if x!= x else int(x))
    return df_results

# Constants

In [2]:
######################### SURVEY DATA #########################

COLS={
    'inv_time': ['Investor Experience', ['<1 year','2-3 years', '4-6 years', '7-9 years', '> 10 years'],],  
    'inv_motivation': ['Motivation', [
        # ['Support innovative startups or entrepreneurs', 'Potential high returns on investments', 'To diversify my investment portfolio', 'Recommendation from friends', 'Benefits from tax breaks', 'My financial capacity', 'My family background', 'Other' ],
        # ['Desire to support innovative startups or entrepreneurs', 'Potential high returns on investments', 'Diversifying my investment portfolio', 'Recommendation from friends or media influence', 'Benefits from tax breaks', 'Business reasons (you could be a supplier, etc.)', 'Other' ],
        ['Support entrepreneurs', 'High returns', 'Diversify portfolio', 'Friend recommendation', 'Tax breaks', 'Financial capacity', 'Family background', 'Other' ],
        ['Support entrepreneurs', 'High returns', 'Diversify portfolio', 'Friend recommendation', 'Tax breaks', 'Business reasons', 'Other' ],
    ],], # Not same labels
    'ba_group': ['Part of BA Network', ['Yes', 'No'],], # Boolean Only BA
    'ba_group_name': ['BA Network'], # Only BA
    'ba_group_reason': ['Part of BA Network Reason',
        # ['To gain access to a larger number of investment opportunities', 'To invest in mutual funds set up by the network', 'To leverage the experience of other network members', 'To benefit from educational or training sessions', 'To expand my personal and professional network', 'To coinvest with other investors', 'Other',],
        ['More opportunities', 'Network funds', "Members' experience", 'Training', 'Expand network', 'Coinvest', 'Other',],
    ], # Only BA  
    'inv_num': ['Number of Investments', ['None', '1-2', '3-5', '6-10', '11-20', '>20'],],
    'inv_companies_last2y': ['Last 2Y Investments'], # Text
    'inv_amount_average': ['Average Invested Amount',[
        ['<25K€', '25k€-50k€', '50k€-100k€', '100k€-500k€', '>500k€', ],
        ['<0.5K€', '0.5k€-1k€', '1k€-5k€', '5k€-10k€', '10k€-50k€', '>50k€'],
    ],], # Not same labels
    'inv_capital_stake': ['Typical Stake', ['0-5%', '6-10%', '11-20%', '21-30%', '31-50%', '>51%'],], # Only for BA
    'inv_dd_duration': ['Analysis Duration',  [
        ['Less than 1 month', '1-6 months', '7-12 months', '13-18 months', 'More than 18 months'],
        ['Less than 1 hour', '1 to 2 hours', '3 to 5 hours', '6 to 10 hours', 'More than 10 hours'],
    ],], # Not same labels
    'inv_criteria': ['Investing Criteria',  [
        # ['Alignment with my experience and expertise', 'Business plan', 'Scalability/Market potential', 'Entrepreneurial team', 'Environmental/social impact', 'Exit strategy', 'Fair valuation/return', 'Other'],
        # ['Business plan', 'Crowd opinion', 'Entrepreneurial team', 'Environmental/social impact', 'Exit strategy', 'Fair valuation/return', 'Other'],
        ['Expertise', 'Business plan', 'Scalability', 'Team', 'ES impact', 'Exit', 'Return', 'Other'],
        ['Business plan', 'Crowd opinion', 'Team', 'ES impact', 'Exit', 'Return', 'Other'],
    ],], # Not same labels multiple
    'inv_sectors': ['Sectors', 
        # ['Information technology (software, internet, IT services, etc.)', 'Finance', 'Healthcare / Biotech', 'Manufacturing', 'Retail', 'Consulting', 'Energy', 'Transportation', 'Other'],
        ['IT', 'Finance', 'Healthcare', 'Manufacturing', 'Retail', 'Consulting', 'Energy', 'Transportation', 'Other'],
    ],
    'inv_process': ['Investing Process',
        # ['I invest alone (as an individual investor)', 'I invest as part of an angel syndicate or group', 'I invest through venture capital funds', 'I invest through crowdfunding platforms', 'Other'],
        ['Alone', 'Syndicate/group', 'Venture capital', 'Crowdfunding platforms', 'Other'],
    ], # Only BA
    'inv_visits': ['Number of Visits', ['0 per year', '1-2 per year', '3-6 per year', '7-12 per year', 'More than 12 per year'],], # Only BA
    'non_fin_contr': ['Non-Financial Contribution',
        # ['Strategic guidance', 'Networking opportunities', 'Financial and legal expertise', 'Technical or industrial knowledge', 'Monitoring and accountability', 'Other',],
        ['Strategy', 'Networking', 'Financial and legal expertise', 'Technical or industrial knowledge', 'Monitoring', 'Other',],
    ], # Only BA
    'child_bias': ['Children Effect', ['1 - Not at all', '2', '3', '4', '5 - Very much',],], # Only BA
    'inv_goal': ['Investment Goal',
        # ['Capital preservation (low risk, low return)', 'Income generation (moderate risk, moderate return)', 'Growth (high risk, high return)', 'Speculative (very high risk, very high return)'],
        ['Capital preservation', 'Income generation', 'Growth', 'Speculative'],
    ], 
    'inv_exit': ['Exit', [
        # ['No, I have not sold my investments yet', 'Yes, through a private transaction', 'Yes, through the company acquisition', 'Yes, through an initial public offering', 'Yes, through a company buyback program', 'Yes, through cessation of the company activity'], 
        # ['No, I have not sold my investments yet', "Yes, through the platform’s secondary market",  'Yes, through a private transaction', 'Yes, through the company acquisition', 'Yes, through an initial public offering', 'Yes, through a company buyback program', 'Yes, through cessation of the company activity'], 
        ['Not sold yet', 'Private transaction', 'Company acquisition', 'IPO', 'Company buyback', 'Cessation'], 
        ['Not sold yet', "Platform’s secondary market", 'Private transaction', 'Company acquisition', 'IPO', 'Company buyback', 'Cessation'], 
    ] ], # Not same labels
    'inv_return': ['Investment Returns',
        # ['I have no idea', 'Highly negative', 'Negative', 'Neutral', 'Positive', 'Highly positive'],
        ['No idea', 'Highly negative', 'Negative', 'Neutral', 'Positive', 'Highly positive'],
    ],
    'women_investing': ['Women Investing Knowledge', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']], 
    'women_involvement': ['Women Involvement', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']], 
    'women_influence': [ 'Women Influence', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']],
    'women_diff_eval_process': [ 'Women Evaluation Process', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']],
    'women_risk_averse': ['Women Risk Aversion', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']],
    'women_evolution_involvement': ['Women Evolution Involvement', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']],
    'women_evolution_neg_stereotype': ['Women Stereotype Evolution', ['Strongly disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly agree']],
    'mother_needs_consideration': ["Consideration of Mothers' Needs", ['1-Not at all', '2', '3', '4', '5-Completely',]],
    'women_reco': ['Recommendations for Attracting Women',[
        # ['Increase visibility of successful women business angels', 'Offer targeted networking events for women', 'Highlight women-led projects', 'Offer training or mentorship programs for aspiring women investors', 'Establish women-focused investment funds or syndicates', 'Other'],
        # ['Create events or online communities where women can network', 'Ensure the platform is user-friendly and inclusive', 'Highlight women-led projects', 'Offer training programs', 'Showcase successful women crowd-investors in the investment space', 'Other']
        ['Communication on success', 'Networking', 'Communication on projects', 'Training', 'Specific funds', 'Other'],
        ['Networking', 'User-Friendliness','Communication on projects', 'Training', 'Communication on success', 'Other'],
    ]],
    'women_led_inv': ['Number of Women-Led Investments', [
        ['None', '1-2', '3-5', '6-10', '11-20', '>20'],
        ['None', '1-2', '3-5', '6-10', '>10'],
    ]],
}

ORDINAL_COLS=[
    'inv_time',
    'inv_num',
    'inv_amount_average',
    'inv_capital_stake',
    'inv_dd_duration',
    'inv_visits',
    'child_bias',
    'inv_goal',
    'inv_return',
    'women_investing',
    'women_involvement',
    'women_influence',
    'women_diff_eval_process',
    'women_risk_averse',
    'women_evolution_involvement',
    'women_evolution_neg_stereotype',
    'mother_needs_consideration',
    'women_led_inv',
]

MUL_COLS=[
    'inv_motivation',
    'ba_group',
    'ba_group_name',
    'ba_group_reason',
    'inv_criteria',
    'inv_sectors',
    'inv_process',
    'non_fin_contr',
    'inv_exit',
    'inv_platform',
    'women_reco',
]

DIFF_COLS=[
    'inv_motivation',
    'inv_amount_average',
    'inv_dd_duration',
    'inv_criteria',
    'inv_exit',
    'inv_platform',
]

BA_ONLY_COLS=[
    'ba_group',
    'ba_group_name',
    'ba_group_reason',
    'inv_capital_stake',
    'inv_process',
    'inv_visits',
    'non_fin_contr',
    'child_bias',
]

CI_ONLY_COLS=[
    'inv_platform',
]

GENDER_COLS=[
    'women_investing',
    'women_involvement',
    'women_influence',
    'women_diff_eval_process',
    'women_risk_averse',
    'women_evolution_involvement',
    'women_evolution_neg_stereotype',
    'mother_needs_consideration',
    'women_reco',
    'women_led_inv',
]

######################### GROUPS #########################

GROUPS={
    'gender_male':[
        [1, 'Women', 'red', 0.5],
        [2, 'Men', 'blue', 0.5],
    ],
    'survey': [
        ['BA', 'Business Angel', 'green', 0.5],
        ['CI', 'Crowdfunding Investor', 'purple', 0.5],
    ],
}

######################### OTHERS #########################

FUNCS=[mode, 'median', 'mean', 'std', 'count',]


# Charts

In [30]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

######################################### RADAR #########################################

# Radar chart for ordinal or numerical variables 
def radar_num(df, cols, stat_func):
    
    # Build category_order
    category_order=[]
    for i in range(len(cols)):
         category_order.append(COLS[cols[i]][0])
    
    
    # Draw chart for each group
    for groups_key in GROUPS:
        # Get statistics
        df_table=des_stats(df,FUNCS, groups_key, cols)
        groups=GROUPS[groups_key].copy()
        fig = go.Figure()
        for i in range(len(groups)):
            group_features=groups[i]
            group_value=group_features[0]
            group_name=group_features[1]
            r=df_table[df_table[groups_key]==group_value][stat_func]
            fig.add_trace(go.Scatterpolar(
                    r=r,
                    theta=category_order,
                    fill='toself',
                    line_shape='spline',
                    name=group_name,
                    marker_color=group_features[2],
                    opacity=group_features[3],
            ))
            
        # Formatting
        axis_color = "rgba(108, 122, 137,1)"
        fig.update_layout(
        polar=dict(
            bgcolor="rgba(0, 0, 0, 0)",
            angularaxis = dict(
            linewidth = 2,
            showline=True,
            linecolor =axis_color,
            gridcolor =axis_color,
        ),
            
        radialaxis = dict(
            side = "counterclockwise",
            showline = False,
            linewidth = 1,
            gridcolor =axis_color,
            gridwidth = 1,
            dtick = 1,
            range=[1,5],
        )
        ),
        
        legend=dict(
            yanchor="bottom",
            y=-0.4,
            xanchor="center",
            x=0.5,
            bordercolor=axis_color,
            borderwidth=1,
        ),
        )

        fig.show()
        print()

# Radar chart for categorical variables 
def radar_cat(df, col_key):
    var_name=COLS[col_key][0]
    print(var_name)
    category_order=COLS[col_key][1]
    if col_key in MUL_COLS:
        df_temp=mcq_trans(df, col_key)
    else:
        df_temp=df.copy()
    # Draw chart for each group
    for groups_key in GROUPS:
        groups=GROUPS[groups_key].copy()
        max_values=[] # To set up range
        fig = go.Figure()
        for i in range(len(groups)):
            group_features=groups[i]
            group_value=group_features[0]
            group_name=group_features[1]
            start_index=1
            df_group=df_temp[df_temp[groups_key]==group_value].dropna(subset=[col_key])
            
            # Traces
            df_table=freq_table(df_group, col_key, start_index, category_order)
            r=df_table.iloc[:len(category_order),2]
            max_values.append(int(r.max()/10)*10+10)
            fig.add_trace(go.Scatterpolar(
                    r=r,
                    theta=category_order,
                    fill='toself',
                    line_shape='spline',
                    name=group_name,
                    marker_color=group_features[2],
                    opacity=group_features[3],
            ))
        
        # Formatting

        axis_color = "rgba(108, 122, 137,1)"
        fig.update_layout(
        polar=dict(
            bgcolor="rgba(0, 0, 0, 0)",
            angularaxis = dict(
            linewidth = 2,
            showline=True,
            linecolor =axis_color,
            gridcolor =axis_color,
        ),
            
        radialaxis = dict(
            side = "counterclockwise",
            showline = False,
            linewidth = 1,
            gridcolor =axis_color,
            gridwidth = 1,
            dtick = 10,
            range=[0,max(max_values)],
        )
        ),
        
        legend=dict(
            yanchor="bottom",
            y=-0.4,
            xanchor="center",
            x=0.5,
            bordercolor=axis_color,
            borderwidth=1,
        ),
        )

        fig.show()
        print()
    
######################################### HISTOGRAM #########################################

# Multiple trace histogram
def hist(traces, category_orders, x_title, y_title, y_tickformat, histnorm):
    fig = go.Figure()
    for trace in traces:
        fig.add_trace(go.Histogram(histnorm=histnorm, x=trace['x'], name=trace['name'], marker_color=trace['marker_color'], opacity=trace['opacity']))
    
    # Layout
    fig.update_layout(
        xaxis={
            'title': x_title,
            'showline': True,
            'linewidth': 1,
            'linecolor': 'black',
            # 'mirror': True,
            'categoryorder': 'array',
            'categoryarray': category_orders,
        },
        yaxis={
            'title': y_title,
            'showline': True,
            'linewidth': 1,
            'linecolor': 'black',
            # 'mirror': True,
            'tickformat': y_tickformat,
        },
        legend={
        'yanchor': 'bottom',
        'y': -0.25,
        'xanchor': 'center',
        'x': 0.5,
        'orientation': 'h',
        },
    )

    fig.show()

# Perform the distribution  chart
def dist_chart(df, cols, start_range, y_title, y_tickformat, histnorm):
    for col_key in cols:
        
        var_name=COLS[col_key][0]
        
        # Transform the dataframe if MCQ
        df_temp=pd.DataFrame()
        if col_key in MUL_COLS:
            df_temp=mcq_trans(df, col_key)
        else:
            df_temp=df.copy()
            
        ##################### Consolidated #####################
        
        # Exclude column that are BA or CI only or have different labels
        if (
            (not col_key in DIFF_COLS)
            & (not col_key in BA_ONLY_COLS)
            & (not col_key in CI_ONLY_COLS) 
        ):
            
            category_orders=COLS[col_key][1]
            category_range=range(start_range, start_range+len(category_orders))
            
            for group_key in GROUPS:
                    
                group_features=GROUPS[group_key].copy()
                
                # Build traces
                traces=[]
                for i in range(len(group_features)):
                    df_trace=df_temp[df_temp[group_key]==group_features[i][0]]
                    traces.append({
                        'x': df_trace[col_key].replace(category_range, category_orders),
                        'name': group_features[i][1],
                        'marker_color': group_features[i][2],
                        'opacity': group_features[i][3],
                    })
                x_title=''
                
                # Display
                print('{} - Consolidated - {}'.format(var_name, 'Gender' if group_key=='gender_male' else 'Investor Type'))
                hist(traces, category_orders, x_title, y_title, y_tickformat, histnorm)
                print()
            
        else:
            print('Consolidated chart not possible because {} has different labels.'.format(COLS[col_key][0]))
            print()
        
        ##################### Cross Group Analysis #####################
        
        if (
            (not col_key in DIFF_COLS)
        ):
            
            category_orders=COLS[col_key][1]
            category_range=range(start_range, start_range+len(category_orders))
            
            groups=[]
            for group_key in GROUPS:
                temp=[group_key]
                temp.append(GROUPS[group_key].copy())
                groups.append(temp)

            # Build traces
            traces=[]
            
            for i in range(len(groups[0][1])):
                for j in range(len(groups[1][1])):
                    df_trace=df_temp[
                        (df_temp[groups[0][0]]==groups[0][1][i][0]) 
                        & (df_temp[groups[1][0]]==groups[1][1][j][0])
                    ]
                    traces.append({
                        'x': df_trace[col_key].replace(category_range, category_orders),
                        'name': '{} & {}'.format(groups[0][1][i][1],groups[1][1][j][1]),
                        'marker_color': groups[0][1][i][2],
                        'opacity': groups[0][1][i][3]/2 if groups[1][1][j][0]=='BA' else groups[0][1][i][3],
                    })
            x_title=''
            
            # Display
            print('{} - Cross Group'.format(var_name))
            hist(traces, category_orders, x_title, y_title, y_tickformat, histnorm)
            print()
            
        else:
            print('Cross group chart not possible because {} has different labels.'.format(COLS[col_key][0]))
            print()

# Load dataframe

In [4]:
# Load data
import pandas as pd 

file_path='assets/datasets/clean/250604-consolidated_survey.xlsx'
df_load=pd.read_excel(file_path).drop(columns=['Unnamed: 0'])

# Descriptive Statistics and Whitney Mann U Test

In [31]:
######################################### GENDER #########################################
import pandas as pd

# Basic statistics
df=df_load
# df=df_load[df_load['survey']=='BA'] # BA only
# df=df_load[df_load['survey']=='CI'] # CI only

funcs=[mode, 'median', 'mean', 'std', 'count',]
group_label='gender_male'

# Ordinal, not multiple options, not in gender
# variable labels can be different for BAs and CIs
columns=[]
for col in COLS:
    if (
        (col in ORDINAL_COLS) 
        & (not col in MUL_COLS) 
        & (not col in GENDER_COLS)
    ):
        columns.append(col)

df_results=des_stats_table(df, funcs, group_label, columns)
df_results

Unnamed: 0,var,gender_male_2.0,mode_2.0,median_2.0,mean_2.0,std_2.0,count_2.0,gender_male_1.0,mode_1.0,median_1.0,mean_1.0,std_1.0,count_1.0,pvalue
0,inv_time,2.0,5.0,3.0,3.372093,1.337658,86,1.0,3.0,3.0,3.064516,1.09348,31,0.261672
1,inv_num,2.0,3.0,3.0,3.423529,1.483051,85,1.0,3.0,3.0,3.375,1.539795,32,0.965182
2,inv_amount_average,2.0,1.0,2.0,2.5,1.295408,74,1.0,1.0,1.0,1.814815,1.177907,27,0.011706
3,inv_capital_stake,2.0,1.0,2.0,1.84127,0.987116,63,1.0,1.0,1.0,1.521739,0.897956,23,0.105298
4,inv_dd_duration,2.0,3.0,3.0,2.683544,0.725794,79,1.0,3.0,3.0,2.612903,0.667204,31,0.677881
5,inv_visits,2.0,2.0,2.0,2.569231,1.015031,65,1.0,2.0,2.0,2.26087,0.963771,23,0.219112
6,child_bias,2.0,1.0,1.0,1.893617,1.146527,47,1.0,1.0,1.0,2.166667,1.465285,18,0.561968
7,inv_goal,2.0,3.0,3.0,2.775,0.655551,80,1.0,3.0,3.0,2.483871,0.676805,31,0.056241
8,inv_return,2.0,5.0,4.0,4.0,1.433994,72,1.0,5.0,4.0,3.777778,1.476309,27,0.530877


In [32]:
######################################### INVESTOR TYPE #########################################
import pandas as pd

# Basic statistics
df=df_load
# df=df_load[df_load['gender_male']==1] # Women only
# df=df_load[df_load['gender_male']==2] # Men only

funcs=[mode, 'median', 'mean', 'std', 'count',]
group_label='survey'

# Ordinal, not multiple options, not in gender
# variable labels can be different for BAs and CIs
# May have N/A because BA only or CI only
columns=[]
for col in COLS:
    if (
        (col in ORDINAL_COLS) 
        & (not col in MUL_COLS) 
        & (not col in GENDER_COLS)
    ):
        columns.append(col)

df_results=des_stats_table(df, funcs, group_label, columns)
df_results


One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.


One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.


One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.


One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.


One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.


One or more sample arguments is too small; all returned values will be NaN. See documentation for sample size requirements.



Unnamed: 0,var,survey_BA,mode_BA,median_BA,mean_BA,std_BA,count_BA,survey_CI,mode_CI,median_CI,mean_CI,std_CI,count_CI,pvalue
0,inv_time,BA,3.0,4.0,3.56383,1.187375,94,CI,2.0,2.0,2.173913,1.029217,23,5.037791e-06
1,inv_num,BA,3.0,4.0,3.76087,1.329046,92,CI,1.0,2.0,2.153846,1.347362,26,6.397449e-07
2,inv_amount_average,BA,1.0,2.0,2.306818,1.27178,88,CI,2.0,2.0,2.384615,1.502135,13,0.9495629
3,inv_dd_duration,BA,3.0,3.0,2.688889,0.611582,90,CI,3.0,3.0,2.55,1.050063,20,0.6004409
4,inv_goal,BA,3.0,3.0,2.766667,0.671239,90,CI,2.0,2.0,2.380952,0.589592,21,0.01749176
5,inv_return,BA,5.0,4.0,4.011628,1.409999,86,CI,5.0,3.0,3.461538,1.613246,13,0.3009928


# Radar Charts

In [33]:
######################################### RADAR ORDINAL VARIABLES #########################################

df=df_load.copy()
cols=[
  'inv_time',
  'inv_num',
  'inv_amount_average',
  'inv_dd_duration',
  'inv_goal',
  'inv_return',
]
stat_func='mean'

radar_num(df, cols, stat_func)







In [34]:
######################################### RADAR CHART - OTHER VARIABLES #########################################

df=df_load.copy()
for col_key in COLS:
        try:
            if (
                (not col_key in GENDER_COLS)
                & (not col_key in DIFF_COLS)
                & (not col_key in BA_ONLY_COLS)
                & (not col_key in CI_ONLY_COLS)
                & (col_key!='inv_companies_last2y')
                
            ):
                radar_cat(df, col_key)
        except:
            print('Error for {}'.format(col_key))

Investor Experience






Number of Investments






Sectors






Investment Goal






Investment Returns














# Distribution Histogram

In [37]:
df=df_load.copy()
cols=[]
for col_key in COLS:
    if (
        (not col_key in GENDER_COLS)
        & (not col_key in DIFF_COLS)
        & (not col_key in BA_ONLY_COLS)
        & (not col_key in CI_ONLY_COLS)
        & (col_key!='inv_companies_last2y')
    ):
        cols.append(col_key)
start_range=1
histnorm='percent'
y_title='Percentage'
y_tickformat=''

dist_chart(df, cols, start_range, y_title, y_tickformat, histnorm)


Investor Experience - Consolidated - Gender



Investor Experience - Consolidated - Investor Type



Investor Experience - Cross Group



Number of Investments - Consolidated - Gender



Number of Investments - Consolidated - Investor Type



Number of Investments - Cross Group



Sectors - Consolidated - Gender



Sectors - Consolidated - Investor Type



Sectors - Cross Group



Investment Goal - Consolidated - Gender



Investment Goal - Consolidated - Investor Type



Investment Goal - Cross Group



Investment Returns - Consolidated - Gender



Investment Returns - Consolidated - Investor Type



Investment Returns - Cross Group



