In [211]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, WebDriverException

import pickle
import pandas as pd
import numpy as np
import time
import os

print('Pandas version', pd.__version__)
print('Numpy version', np.__version__)

Pandas version 0.20.3
Numpy version 1.14.0


In [212]:
# Instantiate a driver object
chromedriver = "/Applications/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver

In [213]:
# Opens a Chrome window and goes to google.com
driver = webdriver.Chrome(chromedriver)

## Read in school data

In [214]:
schools = pd.read_pickle('../data/school_data.pkl')

In [215]:
fb = schools['Facebook']
tw = schools['Twitter']

## Find Facebook Likes

In [218]:
def fetch_fb_likes(fb_urls):
    """
    Takes a list of facebook urls in as input and returns a corresponding 
    list of likes for all pages where this is publically available
    """
    list_likes = []
    
    for index, url in enumerate(fb_urls):
        
        if pd.isnull(url):
            list_likes.append(None)
            
        else:
        
            # Go to school facebook page
            driver.get(url)
            time.sleep(3.3);

            try:            
                # Find Page Likes
                count = driver.find_element_by_id('PagesLikesCountDOMID')
                count.text

                like = count.text.split()[0].replace(',', '')
                list_likes.append(int(like))
                
                time.sleep(2.1);

            except NoSuchElementException:
                list_likes.append(None)
    
    return list_likes
        

In [219]:
fb_likes = fetch_fb_likes(fb)

In [220]:
len(fb_likes)

121

In [221]:
schools['fb_likes'] = fb_likes

In [230]:
schools.sample(10)

Unnamed: 0,School_ID,Short_Name,Long_Name,School_Type,Zip,Facebook,Twitter,Student_Count_Total,Student_Count_Low_Income,School_Hours,Transportation_El,School_Latitude,School_Longitude,Average_ACT_School,Graduation_Rate_School,fb_likes,tw_followers,tw_tweets
177,609694,HANCOCK HS,John Hancock College Preparatory High School,Selective enrollment,60629,https://www.facebook.com/HancockCPS,https://twitter.com/cpshancock,972,855,8:15am - 3:30pm,Orange,41.791464,-87.72447,16.9,84.6,395.0,400.0,322.0
58,610529,OGDEN HS,Ogden International High School,Neighborhood,60642,http://www.facebook.com/OgdenInternational,https://twitter.com/OgdenIntl,880,481,8:00 AM - 3:15 PM,Blue,41.893548,-87.659405,19.7,83.6,1217.0,614.0,965.0
475,610392,WORLD LANGUAGE HS,World Language Academy High School,Small,60623,,,351,337,8:00 am-3:15 pm,Pink,41.834397,-87.735292,17.5,77.9,,,
372,609674,CHICAGO VOCATIONAL HS,Chicago Vocational Career Academy High School,Career academy,60617,,,885,842,8:00 AM - 3:15 PM,,41.737174,-87.573268,15.6,72.6,,,
520,400053,NOBLE - GOLDER HS,Noble - Golder College Prep,Charter,60642,https://www.facebook.com/GolderCollegePrep,,670,602,7:55 AM - 3:40 PM,Blue,41.895282,-87.664483,19.5,81.7,,,
88,400010,ACE TECH HS,ACE Technical Charter School,Charter,60609,https://www.facebook.com/pages/ACE-Technical-C...,https://twitter.com/ACETECHNICAL,314,302,7:40 AM-3:22 PM,"Brown, Purple, Red",41.796122,-87.625849,15.2,68.8,,227.0,614.0
191,609754,CHICAGO MILITARY HS,Chicago Military Academy High School,Military academy,60653,,,313,295,7:30 am-3:05 pm,"Green, Red",41.830555,-87.619216,16.9,85.4,,,
636,400059,NORTH LAWNDALE - COLLINS HS,North Lawndale College Prep - Collins,Charter,60623,https://www.facebook.com/nlcphs,,351,340,8:00 a.m. - 3:45 p.m.,Red,41.864146,-87.700681,14.5,69.7,1419.0,,
460,609760,CARVER MILITARY HS,George Washington Carver Military Academy HS,Military academy,60627,,,479,443,7:45 AM - 3:41 PM,Red,41.65629,-87.59062,18.0,91.0,,,
651,400097,NOBLE - BULLS HS,Noble - Chicago Bulls College Prep,Charter,60612,,,1146,1010,7:55 AM-3:40 PM,Blue,41.878809,-87.678195,21.7,71.5,,,


## Find Twitter Followers

In [223]:
def fetch_tw_followers(tw_urls):
    """
    Takes a list of twitter urls in as input and returns a corresponding 
    list of follwer counts and tweet counts for all pages where this is publically available
    """
    follower_count = []
    tweet_count = []

    for index, url in enumerate(tw_urls):
        
        # Skip nan values
        if pd.isnull(url):
            follower_count.append(None)
            tweet_count.append(None)
            
        else:
            # Go to school facebook page
            driver.get(url)
            time.sleep(3.3);

            try:            
            # Find Twitter Followers and Tweet Counts
                followers = driver.find_elements_by_xpath('//li[@class="ProfileNav-item ProfileNav-item--followers"]')
                time.sleep(2.1)

                tweets = driver.find_elements_by_xpath('//li[@class="ProfileNav-item ProfileNav-item--tweets is-active"]')
                time.sleep(1.2)

                # Try to append follower and tweet counts if existing
                try:

                    flw = followers[0].text.split()[2].replace(',', '')
                    follower_count.append(int(flw))

                except IndexError:
                        follower_count.append(None)

                try:
                    twt = tweets[0].text.split()[4].replace(',', '')
                    tweet_count.append(int(twt))

                except IndexError:
                    tweet_count.append(None) 

            except NoSuchElementException:
                follower_count.append(None)
                tweet_count.append(None) 
            
    return follower_count, tweet_count
        

In [224]:
tw_fols, tw_twts = fetch_tw_followers(tw)

In [225]:
# Check array is correct length
print(len(tw_fols)) 
print(len(tw_twts)) 

121
121


In [226]:
schools['tw_followers'] = tw_fols
schools['tw_tweets']= tw_twts

### Pickle school data w/ social

In [227]:
schools.to_pickle('../data/schools_w_social.pkl')

In [228]:
schools.to_csv('../data/schools_w_social.csv', index=False)