#### Lesson 5 Filtering Data in Python with Boolean Indexes ####
Link to this lesson: https://community.modeanalytics.com/python/tutorial/python-filtering-with-boolean-indexes/

In [1]:
# Import packages
import pandas as pd
# default notebook setup
pd.options.display.float_format = '{:3f}'.format # suppresses scientific notation in pandas

In [2]:
# Reads in the csv of Watsi data
data = pd.read_csv('watsi_dataset_demo.csv')
data = data.fillna('') # replace missing values with strings for easier text processing
data.info() # prints out column names, counts and dtypes in the dataframe
# the Watsi web traffic can now be called via the 'data' dataframe object

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 9 columns):
referrer           5000 non-null object
timestamp          5000 non-null object
title              5000 non-null object
url                5000 non-null object
user_agent         5000 non-null object
user_id            5000 non-null object
referrer_domain    5000 non-null object
website_section    5000 non-null object
platform           5000 non-null object
dtypes: object(9)
memory usage: 351.6+ KB


In [3]:
data.head()

Unnamed: 0,referrer,timestamp,title,url,user_agent,user_id,referrer_domain,website_section,platform
0,https://www.google.com/,2016-02-05 00:48:23,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4...,CHAROLETTE S,google,,Desktop
1,https://themeteorchef.com/snippets/making-use-...,2016-02-24 23:12:10,Watsi | The Meteor Chef,https://watsi.org/team/the-meteor-chef,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...,WARREN Q,themeteorchef.com,team,Desktop
2,https://watsi.org/,2015-12-25 17:59:35,Watsi | Give the gift of health with a Watsi G...,https://watsi.org/gift-cards,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1...,MITCHEL O,watsi.org,gift-cards,Desktop
3,,2016-02-05 21:19:30,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2...,MICHEL O,,,Desktop
4,https://watsi.org/fund-treatments,2016-02-14 19:30:08,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2...,ANDREE N,watsi.org,,Desktop


In [4]:
(data['title'] == 'Watsi | Fund medical treatments for people around the world')

0        True
1       False
2       False
3        True
4        True
5       False
6       False
7       False
8       False
9        True
10      False
11      False
12      False
13      False
14      False
15      False
16      False
17      False
18      False
19       True
20       True
21      False
22      False
23      False
24      False
25      False
26      False
27      False
28      False
29       True
        ...  
4970    False
4971    False
4972    False
4973     True
4974     True
4975    False
4976    False
4977    False
4978    False
4979    False
4980    False
4981    False
4982    False
4983     True
4984     True
4985    False
4986    False
4987     True
4988    False
4989    False
4990    False
4991     True
4992    False
4993    False
4994    False
4995    False
4996     True
4997     True
4998    False
4999    False
Name: title, dtype: bool

In [5]:
homepage_index = (data['title'] == 'Watsi | Fund medical treatments for people around the world')

In [6]:
watsi_homepage = data[homepage_index]

In [7]:
watsi_homepage['referrer'].value_counts()[:15]

                                                                                                     451
https://www.google.com/                                                                              153
https://www.google.co.in/                                                                             53
https://watsi.org/                                                                                    22
https://www.reddit.com/                                                                               21
https://watsi.org/fund-treatments                                                                     18
http://blog.watsi.org/                                                                                17
https://watsi.org/about                                                                               17
https://www.google.com.au/                                                                            16
https://www.google.ca/                                 

In [8]:
watsi_homepage['referrer_domain'].value_counts()[:10]

                         452
google                   422
watsi.org                169
reddit.com                36
facebook.com              24
t.co                      22
vessel.com                11
forbes.com                10
adzerk.net                 9
thedoctorschannel.com      7
Name: referrer_domain, dtype: int64

In [9]:
watsi_reddit_pageviews = data[data['referrer_domain'] == 'reddit.com']
watsi_reddit_pageviews['title'].value_counts()[:20]

Watsi | Reddit Uplifting News                                  52
Watsi | Fund medical treatments for people around the world    36
The surprising role of Netflix in global health                 9
Watsi | Give the gift of health with a Watsi Gift Card          2
Name: title, dtype: int64

In [10]:
medical_referrals = data[data['referrer'].str.contains('medical')]
medical_referrals

Unnamed: 0,referrer,timestamp,title,url,user_agent,user_id,referrer_domain,website_section,platform
222,http://www.inc.com/abigail-tracy/new-crowdfund...,2015-12-07 23:15:52,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) A...,BERNADETTE W,inc.com,,iPad
568,http://www.google.pl/url?sa=t&source=web&cd=1&...,2016-03-19 10:03:28,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-...,KRISTIAN E,google,,Android
3115,https://www.reddit.com/r/Anarcho_Capitalism/co...,2015-12-19 00:33:04,Watsi | Give the gift of health with a Watsi G...,https://watsi.org/gift-cards,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...,JAIMEE U,reddit.com,gift-cards,Desktop
3886,http://poormedicalcare.weebly.com/help-our-cau...,2016-03-19 18:30:13,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebK...,STEPANIE V,weebly.com,,Desktop
3980,http://my.parallaxsearch.com/web?qs=crowd+fund...,2016-03-11 05:22:01,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...,ELLIE U,parallaxsearch.com,,Desktop
4859,http://www.google.co.in/search?site=&oq=crowdf...,2016-03-14 18:59:07,Watsi | Fund medical treatments for people aro...,https://watsi.org/,Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; S...,MARRY D,google,,Android


In [11]:
medical_referrals['referrer'].tolist()

['http://www.inc.com/abigail-tracy/new-crowdfunding-site-for-medical-students.html',
 'http://www.google.pl/url?sa=t&source=web&cd=1&rct=j&q=medical%20treatment%20grants&ved=0ahUKEwjRo5DGv8zLAhUiS5oKHdOAC58QFggaMAA&url=https%3A%2F%2Fwatsi.org%2F&usg=AFQjCNEkfSK0vlcYRRpj89LfLVEvb8rV6A&sig2=dKNwFh-vKOj-UFZJ63zciw',
 'https://www.reddit.com/r/Anarcho_Capitalism/comments/3xddbx/watsi_a_cool_new_charity_that_crowdfunds_medical/',
 'http://poormedicalcare.weebly.com/help-our-cause.html',
 'http://my.parallaxsearch.com/web?qs=crowd+funding+for+medical+treatment',
 'http://www.google.co.in/search?site=&oq=crowdfunding+for+me&aqs=mobile-gws-lite.0.0l5&q=crowdfunding+for+medical+treatment']

In [12]:
crowdfund_index = data['referrer'].str.contains('crowdfund')
data[crowdfund_index]['referrer'].tolist()

['http://www.inc.com/abigail-tracy/new-crowdfunding-site-for-medical-students.html',
 'https://www.reddit.com/r/Anarcho_Capitalism/comments/3xddbx/watsi_a_cool_new_charity_that_crowdfunds_medical/',
 'http://www.google.co.in/search?site=&oq=crowdfunding+for+me&aqs=mobile-gws-lite.0.0l5&q=crowdfunding+for+medical+treatment']

In [13]:
# a one liner way
data[data['referrer'].str.contains('crowdfund')]['referrer'].tolist()

['http://www.inc.com/abigail-tracy/new-crowdfunding-site-for-medical-students.html',
 'https://www.reddit.com/r/Anarcho_Capitalism/comments/3xddbx/watsi_a_cool_new_charity_that_crowdfunds_medical/',
 'http://www.google.co.in/search?site=&oq=crowdfunding+for+me&aqs=mobile-gws-lite.0.0l5&q=crowdfunding+for+medical+treatment']

In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 9 columns):
referrer           5000 non-null object
timestamp          5000 non-null object
title              5000 non-null object
url                5000 non-null object
user_agent         5000 non-null object
user_id            5000 non-null object
referrer_domain    5000 non-null object
website_section    5000 non-null object
platform           5000 non-null object
dtypes: object(9)
memory usage: 351.6+ KB


In [16]:
idx = data['user_agent'].str.contains('IEMobile')
data[idx]['user_agent'].tolist()

['Mozilla/5.0 (Mobile; Windows Phone 8.1; Android 4.0; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 635) like iPhone OS 7_0_3 Mac OS X AppleWebKit/537 (KHTML, like Gecko) Mobile Safari/537',
 'Mozilla/5.0 (Mobile; Windows Phone 8.1; Android 4.0; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; Microsoft; Lumia 535 Dual SIM) like iPhone OS 7_0_3 Mac OS X AppleWebKit/537 (KHTML, like Gecko) Mobile Safari/537',
 'Mozilla/5.0 (Mobile; Windows Phone 8.1; Android 4.0; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; Microsoft; Lumia 535 Dual SIM) like iPhone OS 7_0_3 Mac OS X AppleWebKit/537 (KHTML, like Gecko) Mobile Safari/537',
 'Mozilla/5.0 (Mobile; Windows Phone 8.1; Android 4.0; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; Microsoft; Lumia 532; Orange) like iPhone OS 7_0_3 Mac OS X AppleWebKit/537 (KHTML, like Gecko) Mobile Safari/537',
 'Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; Microsoft; Lumia 640 XL Dual SIM)