# Mozilla Survey

In [1]:
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, output_file, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, CustomJS

#output_notebook()
output_file("mozilla_survey.html")

In [2]:
# sanitize data
data = pd.read_csv('SurveyExport.csv',encoding = 'unicode_escape')

# get column names
for col in data:
    if 'trust' in col:
        trust_col = col
    if col.startswith('Privacy'):
        privacy_col = col
    if col.startswith('Security'):
        security_col = col
    if col.startswith('Safety'):
        safety_col = col

# new df for terms and connected devices
terms_df = data.iloc[:, 34:44]
devices_df = data.iloc[:, 8:19]
# hot encoding 0-1
terms_df = terms_df.notnull().astype('int')
devices_df = devices_df.notnull().astype('int')

# required columns
country = data['Country'].replace(r'\s*$', '', regex=True)
tech_savvy = data['I consider myself:']
trust = data[trust_col]
imp_privacy = data[privacy_col].replace(r'\s*$', np.nan, regex=True).dropna()
imp_security = data[security_col].replace(r'\s*$', np.nan, regex=True).dropna()
imp_safety = data[safety_col].replace(r'\s*$', np.nan, regex=True).dropna()
terms = terms_df.sum(axis=1).rename('terms')
connected_devices = devices_df.sum(axis=1).rename('connected devices')

imp_privacy = pd.to_numeric(imp_privacy)
imp_security = pd.to_numeric(imp_security)
imp_safety = pd.to_numeric(imp_safety)
imp = (imp_privacy + imp_security + imp_safety)/3

frame ={
    'country' : country,
    'tech_savvy' : tech_savvy,
    'trust' : trust,
    'imp_privacy' : imp_privacy,
    'imp_security' : imp_security,
    'imp_safety' : imp_safety,
    'imp' : imp,
    'terms': terms,
    'connected_devices' : connected_devices
}
df = pd.DataFrame(frame)

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
tech_savvy_val = list(tech_savvy.dropna().unique())
tech_savvy_mapping = dict(zip(tech_savvy_val, range(len(tech_savvy_val),-1,-1)))
df['tech_savvy'] = df['tech_savvy'].map(tech_savvy_mapping)

**Importance of Privacy** vs **Number of Online Connected Devices**  
  
Here we are comparing the the level of importance of privacy, security and safety for a user with the number of connected devices he/she owns.


In [4]:
imp = df.groupby(country)['imp'].mean()
devices = df.groupby(country)['connected_devices'].mean()
tech = df.groupby(country)['tech_savvy'].mean()
_count = df.groupby(country)['tech_savvy'].count()
_country = list(df.country.dropna().unique())

source = ColumnDataSource(data={'x':devices,
                                'y':imp,
                                'z':tech,
                                'z_2':tech * 2,
                                'country': _country,
                                'count':_count})

TIPS = [("Country","@country"),
        ("# Responses","@count"),
        ("Online connected devices", "@x"),
        ("Importance of Privacy, Security and Safety","@y"),
        ("Technolgy Savviness score","@z")]

p = figure(title="Importance of Privacy, Security, and Safety vs. Number of Connective Devices Owned",
           sizing_mode='stretch_both', 
           match_aspect=True, 
           tooltips=TIPS,
           toolbar_location='below')

p.circle('x', 'y', size='z_2',source=source)

p.title.align = 'center'
p.xaxis.axis_label = "Number of Connected Devices"
p.yaxis.axis_label = "Importance of Privacy, Security and Safety"

show(p)

In [None]:
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar, Title, Label
from bokeh.layouts import row, column
from bokeh.palettes import Blues,Viridis256

test = df.groupby(['connected_devices','trust']).count()
_count = test['country']
df['connected_devices'] = df['connected_devices'].astype(str)
_devices = list(df.connected_devices.unique())
_devices.sort(key = int)
_trust = list(df.trust.dropna().unique())

_tech_savvy = df.groupby(['connected_devices','trust'])['tech_savvy'].mean()
_tech_savvy_matrix = pd.DataFrame(_tech_savvy).reset_index()
_tech_savvy = df.groupby(['connected_devices','trust'])['tech_savvy'].count()
_tech_savvy_count = pd.DataFrame(_tech_savvy).reset_index()

source = ColumnDataSource(data={'x':_tech_savvy_matrix.connected_devices,
                                'y':_tech_savvy_matrix.trust,
                                'z':_tech_savvy_matrix.tech_savvy,
                                'count':_tech_savvy_count.tech_savvy})

TIPS = [('# Responses','@count'),("Technology Savviness score", "@z")]

mapper = LinearColorMapper(palette=Viridis256, 
                           low=_tech_savvy_matrix.tech_savvy.min(), 
                           high=4)

p = figure(title="Trust vs Knowledge",
           x_range=_devices, 
           y_range=_trust,
           x_axis_location="below", 
           sizing_mode='stretch_both',
           toolbar_location='below',
           tooltips=TIPS)

p.title.align = 'center'
p.xaxis.axis_label = 'Number of online connected devices'
p.yaxis.axis_label = 'Whom to trust?'
p.axis.major_label_text_font_size = "8pt"
p.axis.major_label_standoff = 0

p.rect(x="x", 
       y="y", 
       width=1, 
       height=1,
       source=source,
       fill_color={'field': 'z', 'transform': mapper},
       line_color=None)

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="8pt", location=(0, 0))


In [None]:
p.add_layout(color_bar, "right")
p.add_layout(Title(text="Technology Savviness", align="center"), "right")

show(p)