In [2]:
#Cleaning and preprocessing done with Pandas, and plotting and interactivity done with bokeh
import pandas as pd
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import Range1d, HoverTool, LabelSet, Label
from bokeh.models.widgets import Select
from bokeh.io import output_file, show
from bokeh.layouts import gridplot


In [3]:
#Maximizes display
pd.options.display.max_columns = 999
pd.options.display.max_columns = 999
mozillaDF = pd.read_csv('20171013111831-SurveyExport.csv', encoding = "ISO-8859-1", low_memory= False)

# Rename columns about which terms you can explain
mozillaDF = mozillaDF.rename(columns={"WiFi Router:Check all the internet connected devices you currently own:":"WiFi Router",
                                      "Laptop computer:Check all the internet connected devices you currently own:":"Laptop",
                                      "Smart phone:Check all the internet connected devices you currently own:":"Smart Phone",
                                      "Smart TV:Check all the internet connected devices you currently own:":"Smart TV",
                                      "Activity Tracker (ex: Fitbit or Apple Watch):Check all the internet connected devices you currently own:":"Activity Tracker",
                                      "Smarthome Hub (ex. Amazon Echo, Google Alexa):Check all the internet connected devices you currently own:":"Smart Hub",
                                      "Car that connects to the internet:Check all the internet connected devices you currently own:":"Connected Car",
                                      "Smart Thermostat (ex: Nest):Check all the internet connected devices you currently own:":"Smart Thermostat",
                                      "Smart Appliance (ex. Coffeemaker, Refrigerator, Oven, Fridge):Check all the internet connected devices you currently own:":"Smart Appliance",
                                      "Smart Door Locks (ex. Door locks for your home you can open via bluetooth):Check all the internet connected devices you currently own:":"Smart Door",
                                      "Smart Lighting (ex. Connected lighting switches, dimmers, or bulbs):Check all the internet connected devices you currently own:":"Smart Lighting",
                                      "Thinking about a future in which so much of your world is connected to the internet leaves you feeling:":"I am...about the future:"})
                                      

In [4]:
#Quantify 'I consider myself' 
#1 = Luddite, 2 = Average, 3 = Savvy, 4 = Ultra Nerd
mozillaDF['I consider myself:'].replace(to_replace='Technically Savvy:   I know my way around a computer pretty well. When anyone in my family needs technical help, I\x89Ûªm the one they call.', value=3, inplace=True)
mozillaDF['I consider myself:'].replace(to_replace='Luddite:  Technology scares me! I only use it when I have to.', value=1, inplace=True)
mozillaDF['I consider myself:'].replace(to_replace='Ultra Nerd:  I build my own computers, run my own servers, code my own apps. I\x89Ûªm basically Mr. Robot.', value=4, inplace=True)
mozillaDF['I consider myself:'].replace(to_replace='Average User:   I know enough to get by.', value=2, inplace=True)

#Transformed user repsonse to boolean values, so they can be summed later
mozillaDF['WiFi Router'].replace(to_replace='WiFi Router', value=1, inplace=True)
mozillaDF['Laptop'].replace(to_replace='Laptop computer', value=1, inplace=True)
mozillaDF['Smart Phone'].replace(to_replace='Smart phone', value=1, inplace=True)
mozillaDF['Smart TV'].replace(to_replace='Smart TV', value=1, inplace=True)
mozillaDF['Activity Tracker'].replace(to_replace='Activity Tracker (ex: Fitbit or Apple Watch)', value=1, inplace=True)
mozillaDF['Smart Hub'].replace(to_replace='Smarthome Hub (ex. Amazon Echo, Google Alexa)', value=1, inplace=True)
mozillaDF['Connected Car'].replace(to_replace='Car that connects to the internet', value=1, inplace=True)
mozillaDF['Smart Thermostat'].replace(to_replace='Smart Thermostat (ex: Nest)', value=1, inplace=True)
mozillaDF['Smart Appliance'].replace(to_replace='Smart Appliance (ex. Coffeemaker, Refrigerator, Oven, Fridge)', value=1, inplace=True)
mozillaDF['Smart Door'].replace(to_replace='Smart Door Locks (ex. Door locks for your home you can open via bluetooth)', value=1, inplace=True)
mozillaDF['Smart Lighting'].replace(to_replace='Smart Lighting (ex. Connected lighting switches, dimmers, or bulbs)', value=1, inplace=True)

#Transformed repsonse text to ordinal values, so they can be used as a scale on an axis
mozillaDF['I am...about the future:'].replace(to_replace='Scared as hell. The future where everything is connected has me scared senseless. We\x89Ûªre all doomed!', value=1, inplace=True)
mozillaDF['I am...about the future:'].replace(to_replace='Cautiously optimistic. I\x89Ûªm hopeful we\x89Ûªre building a better world by becoming more connected in everything we do.', value=2, inplace=True)
mozillaDF['I am...about the future:'].replace(to_replace='A little wary. All this being connected to the internet in every part of our lives makes me a little nervous. What\x89Ûªs going to happen to our privacy?', value=3, inplace=True)
mozillaDF['I am...about the future:'].replace(to_replace='On the fence.  I\x89Ûªm not sure about all this. I think I\x89Ûªll wait and see.', value=4, inplace=True)
mozillaDF['I am...about the future:'].replace(to_replace='Super excited! I can\x89Ûªt wait for everything to be connected. My life will be so much better.', value=5, inplace=True)
mozillaDF['I am...about the future:'].replace(to_replace= 'NaN', value=0, inplace=True)
mozillaDF = mozillaDF.fillna(0)

#There is a row with country = 0, and I deleted it, so I could sort on the attribute 'Country'
mozillaDF = mozillaDF[mozillaDF['Country'] != 0]

#Sort by 'Country'
mozillaDF = mozillaDF.sort_values(by = ['Country'])




In [5]:
#Summed number of devices per user
mozillaDF['Total Devices'] = (mozillaDF['WiFi Router']+mozillaDF['Laptop']+mozillaDF['Smart Phone']+mozillaDF['Smart TV']+mozillaDF['Activity Tracker']+mozillaDF['Smart Hub']+mozillaDF['Connected Car']+mozillaDF['Smart Thermostat']+mozillaDF['Smart Appliance']+mozillaDF['Smart Door']+mozillaDF['Smart Lighting'])

#Calculated the mean of the all values by 'Country'
mozilla_mean = mozillaDF.groupby(['Country']).mean()

#Created a new variable that counts the total number of respondents from each country 
mozilla_count = mozillaDF.groupby(['Country']).count()

#Appended result from mozilla_count to mozilla_mean
mozilla_mean['Response Count'] = (mozilla_count['Status'])

'''
Eliminated all countries with less than 15 respondents. This dropped country count 
from 218 to 145, but many of those countries dropped had only 1 respondent
'''
mozilla_mean = mozilla_mean[mozilla_mean['Response Count'] > 15]

#Transformed relevant dataframe attributes to list for plotting 
x = mozilla_mean['Total Devices'].tolist()
y = mozilla_mean['I am...about the future:'].tolist()
z = mozilla_mean.index.tolist()



In [6]:
'''
source code for best fit line:
https://stackoverflow.com/questions/22239691/code-for-line-of-best-fit-of-a-scatter-plot-in-python

Best fit line highlights the trend of the final scatterplot visualization, and helps the viewer to investigate outliers. 
'''
def best_fit(x, y):

    xbar = sum(x)/len(x)
    ybar = sum(y)/len(y)
    n = len(x) # or len(Y)

    numer = sum([xi*yi for xi,yi in zip(x, y)]) - n * xbar * ybar
    denum = sum([xi**2 for xi in x]) - n * xbar**2

    b = numer / denum
    a = ybar - b * xbar
    return a, b


In [7]:
#Calls best_fit, and creates the line. 
a, b = best_fit(x, y)
yfit = [a + b * xi for xi in x]


In [11]:
'''
Bokeh code.
'''

#Creates html file called "feelings_devices.html". 
output_file("feelings_devices.html")

#creates an interactivity disctionary 
source = ColumnDataSource(data=dict(
    x = mozilla_mean['Total Devices'].tolist(),
    y = mozilla_mean['I am...about the future:'].tolist(),
    z = mozilla_mean.index.tolist(),
))

#initializes hover attributes
hover= HoverTool(tooltips=[
    ("Attitude", "$y"),
    ("# Devices", "$x"),
    ("Country", "@z"),
])

#scatter plot and best fit line 
p = figure(title = 'Feelings Toward a Connected Future compared to How Many Devices People Own',
           plot_width = 700, plot_height = 700, tools = [hover])
p.title.align = 'center'
p.title.text_color = '#3A3A3C'
p.title.text_font = "arial"
p.title.text_font_size = "12pt"
p.y_range = Range1d(0, 5)

r = p.scatter(x,y, color = '#D8721A', source = source)
p.xaxis.axis_label = 'Average Number of Connected Devices per Person by Country '
p.xaxis.axis_label_text_color = '#3A3A3C'
p.xaxis.axis_label_text_font = 'arial'
p.xaxis.axis_label_text_font_size = '10pt'

p.yaxis.axis_label = ('Average Attitude Toward Connected Future by Country')
p.yaxis.axis_label_text_color = '#3A3A3C'
p.yaxis.axis_label_text_font = 'arial'
p.yaxis.axis_label_text_font_size = '10pt'

p.line(x,yfit, line_width = 2, color = '#660065', line_alpha = 0.4)

#citation* essentially creates text boxes that are posiitonable on the screen
citation5 = Label(x=45, y=656, x_units='screen', y_units='screen',
                 text='Super Excited! ', text_color = '#3A3A3C',text_font_size = '10pt', render_mode='css',
                  text_font_style = 'italic',
                 border_line_color='#3A3A3C', border_line_alpha=0,
                 background_fill_color='white', background_fill_alpha=1.0)
citation4 = Label(x=45, y=535, x_units='screen', y_units='screen',
                 text='Cautiously optimistic. ', text_color = '#3A3A3C',text_font_size = '10pt', render_mode='css',
                  text_font_style = 'italic',
                 border_line_color='#3A3A3C', border_line_alpha=0,
                 background_fill_color='white', background_fill_alpha=1.0)
citation3 = Label(x=45, y=410, x_units='screen', y_units='screen',
                 text='On the fence.', text_color = '#3A3A3C', text_font_size = '10pt', render_mode='css',
                  text_font_style = 'italic',
                 border_line_color='#3A3A3C', border_line_alpha=0,
                 background_fill_color='white', background_fill_alpha=1.0)
citation2 = Label(x=45, y=285, x_units='screen', y_units='screen',
                 text='A little wary ', text_color = '#3A3A3C', text_font_size = '10pt', render_mode='css',
                  text_font_style = 'italic',
                 border_line_color='#3A3A3C', border_line_alpha=0,
                 background_fill_color='white', background_fill_alpha=1.0)

citation1 = Label(x=45, y=160, x_units='screen', y_units='screen',
                 text='Scared as hell. ', text_color = '#3A3A3C', text_font_size = '10pt', render_mode='css',
                  text_font_style = 'italic',
                 border_line_color='#3A3A3C', border_line_alpha=0,
                 background_fill_color='white', background_fill_alpha=1.0)




#stylizes data points
glyph = r.glyph
glyph.size = 12
glyph.line_color = '#660065'
glyph.fill_alpha = 0.4
glyph.line_width = 1.2

#Calls citation*
p.add_layout(citation5)
p.add_layout(citation4)
p.add_layout(citation3)
p.add_layout(citation2)
p.add_layout(citation1)


show(p)


Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
