In [1]:
import tweepy
import json
import csv
import time
import math
import pandas as pd

# Following Analysis

In this notebook, we are focusing on the followers. We will calculate the proportion of prolife groups out of the total number of groups they are following.

First we need to get all the followings for each follower.

In [23]:
prochoice_users = [
'freesafelegal',
'AbortionFunds',
'Abortion_Rights',
'DCAbortionFund',
'NatAbortionFed',
'AbortionCare',
'AbortionSupport', 
'PretermMAML',
'NJAAF',
'ChiAbortionFund',
'AbortionRights',
'NDWINFund',
'needabortionire',
'WholeWomans',
'roisiningle',
'StephHerold',
'BaltimoreFund',
'prochoiceNSW',
'RBraceySherman',
'Doctors4Choice',
'robinmarty',
'NCAbortionFund',
'ShoutYrAbortion',
'NYAAF',
'BronxAbortion',
'MSFC',
'AbortionStories',
'abortion_clinic',
'SeaChangeProg',
'Safe_Abortion',
'Feminists4Life',
'alranztweets',
'nolaAbortionFnd',
'TammiKromenaker',
'LPJLeague',
'Voice4ChoiceUK',
'abortionpil',
'RallyforChoice',
'SisterSupporter',
"AcpAbortion"
]

prolife_users = ["AbortionGroup","prolifecampaign","RosaryMovement","recallabortion",
                    "ExposeAbortion","40daysforlife","S2EAS_","LifeNewsHQ","AbolitionAHA",
                    "AntiAbrtionGang","FightAbortion","ProLifeBlogs","Bound4LIFE","HumanCoalition",
                    "LdnAntiAbortion","PLAM_org","ProLifeLSU","NeverAbortion","operationrescue",
                    "KeepLifeLegal","ProLifePolitics","ProLifeYouth","CA_ProLife", "SBAList","ProLifeAction",
                    "nrlc","LilaGraceRose","LiveAction","spucprolife","AUL","frfrankpavone","ProLifeMastery",
                    "prolifenews","LGBTQProLife","prolifealliance","SanDiegoProLife","prolifepoppop",
                    "usccbprolife", "AmerLifeLeague", 'ProLifeDem']


In [32]:
def load_keys(path):
    """Loads your Twitter authentication keys from a file on disk.
    
    Args:
        path (str): The path to your key file.  The file should
          be in JSON format and look like this (but filled in):
            {
                "consumer_key": "<your Consumer Key here>",
                "consumer_secret":  "<your Consumer Secret here>",
                "access_token": "<your Access Token here>",
                "access_token_secret": "<your Access Token Secret here>"
            }
    
    Returns:
        dict: A dictionary mapping key names (like "consumer_key") to
          key values."""
    # Loading your keys from keys.json (which you should have filled
    # in in question 1):
    with open("keys.json") as f:
        keys = json.load(f)
        
    mapping = {}     
    mapping["consumer_key"] = keys["consumer_key"]
    mapping["consumer_secret"] = keys["consumer_secret"]
    mapping["access_token"] = keys["access_token"]
    mapping["access_token_secret"] = keys["access_token_secret"]
    return mapping

def get_followings(user, keys):
    auth = tweepy.OAuthHandler(keys["consumer_key"], keys["consumer_secret"])
    auth.set_access_token(keys["access_token"], keys["access_token_secret"])
    api = tweepy.API(auth)
    followings_id = []
    for page in tweepy.Cursor(api.friends_ids, screen_name = user).pages():
        # Process the friend here
        followings_id.extend(page)
        time.sleep(60)
    following_screen_names = []
    num_iters = math.ceil(len(followings_id)/100)
    for i in range(1, num_iters+1):
        followings = api.lookup_users(user_ids=followings_id[(i-1)*100: 100*i])
        for following in followings:
            following_screen_names.append(following.screen_name)
    return following_screen_names

def get_proportion(followings):
    '''
    return the proportion of prolife group and prochoice group 
    a specific user is following
    '''
    prolife_cnt = 0
    prochoice_cnt = 0
    for following in followings:
        if following in prolife_users:
            prolife_cnt += 1
        elif following in prochoice_users:
            prochoice_cnt += 1
    try:
        return prolife_cnt/(prolife_cnt+prochoice_cnt), prochoice_cnt/(prolife_cnt+prochoice_cnt),\
               prolife_cnt, prochoice_cnt, prolife_cnt+prochoice_cnt
    except ZeroDivisionError:
        return 0, 0, 0, 0, 0

def fetch_data(users, key_path, save_path):
    keys = load_keys(key_path)
    with open(save_path, 'a') as f:
        writer = csv.writer(f)
        writer.writerow(['User'] + ['prolife proportion'] + ['prochoice proportion'])
        for user in users:
            print("user", user, "in process")
            followings = get_followings(user, keys)
            save_followings(user, followings)
            prolife_prop, prochoice_prop = get_proportion(followings)
            writer.writerow([user] + [prolife_prop] + [prochoice_prop])
            time.sleep(180) ## 5+ minute rest between api calls
            
def save_followings(user, followings):
    with open("./followings/{}".format(user), 'w') as f:
        writer = csv.writer(f)
        for following in followings:
            writer.writerow([following])
            
def get_proportion_from_file(users, save_path):
    with open(save_path, 'a') as f:
        writer = csv.writer(f)
        writer.writerow(['User'] + ['prolife proportion'] + ['prochoice proportion']+['prolife cnt']+\
                        ['prochoice cnt'] + ['total cnt'])
        for user in users:
            print("user", user, "in process")
            with open("./followings/{}".format(user), "r") as followings:
                reader = csv.reader(followings, delimiter="\t")
                followings = []
                for line in reader:
                    followings.extend(line)
                prolife_prop, prochoice_prop, prolife_cnt, prochoice_cnt, ttl_cnt = get_proportion(followings)
                writer.writerow([user] + [prolife_prop] + [prochoice_prop] + [prolife_cnt] +\
                                [prochoice_cnt] + [ttl_cnt])    

---
#### Following fetches the followings of each followers sampled from the prolife/prochoice accounts, calculate the proportion of the followings and save the followings in the $followings$ folder

In [33]:
# Get follower list
files_to_read = ['./prolife_followers.csv', './prochoice_followers.csv']
users = []
for file in files_to_read:
    with open(file, 'r') as f:
        reader = csv.reader(f)
        for item in list(reader):
            users += item
assert len(set(users)) == 200 #there should be 200 followers

In [34]:
#key_path = "keys.json"
#fetch_data(['GaryMillrat'], key_path, 'proportion.csv')

In [35]:
get_proportion_from_file(users, 'proportion_temp.csv')

user RebeccaG2012 in process
user ClevelandRTL in process
user marthahorton2 in process
user johng167 in process
user Manuela2day in process
user HopeBeats in process
user SamuelRWeaver1 in process
user kelseamcbelsea in process
user molly_trivette in process
user BrianChiasson in process
user PeterJohnRowe in process
user VLabarum in process
user TheOpenMicah in process
user wafulkerson1 in process
user kgscanlon in process
user Pearl_Barney in process
user rabbrady in process
user Alboteaparty in process
user AllanLohr in process
user turnerdina1 in process
user WarrenDavidson in process
user Elentari98 in process
user Linzito85 in process
user ShinobiShawn2 in process
user DailyCatechist in process
user jackeekgonzalez in process
user derrickj2015 in process
user UKStorageGuru in process
user kimidwa in process
user MikeMorganMN in process
user fedewa_agnes in process
user misbehavens in process
user CSLCHSnMore in process
user amyrpatton in process
user TomBurdolski in process
user

---
#### Following fetches the followings of each prolife/prochoice accounts, calculate the proportion of the followings and save the followings in the $followings$ folder

In [25]:
#key_path = "keys.json"
#fetch_data(prolife_users, key_path, 'prolife_proportion.csv')
get_proportion_from_file(prochoice_users, 'prochoice_proportion_temp.csv')

user freesafelegal in process
user AbortionFunds in process
user Abortion_Rights in process
user DCAbortionFund in process
user NatAbortionFed in process
user AbortionCare in process
user AbortionSupport in process
user PretermMAML in process
user NJAAF in process
user ChiAbortionFund in process
user AbortionRights in process
user NDWINFund in process
user needabortionire in process
user WholeWomans in process
user roisiningle in process
user StephHerold in process
user BaltimoreFund in process
user prochoiceNSW in process
user RBraceySherman in process
user Doctors4Choice in process
user robinmarty in process
user NCAbortionFund in process
user ShoutYrAbortion in process
user NYAAF in process
user BronxAbortion in process
user MSFC in process
user AbortionStories in process
user abortion_clinic in process
user SeaChangeProg in process
user Safe_Abortion in process
user Feminists4Life in process
user alranztweets in process
user nolaAbortionFnd in process
user TammiKromenaker in proces

In [24]:
users = ["AmerLifeLeague", 'ProLifeDem']
key_path = "keys.json"
fetch_data(users, key_path, 'prolife_proportion_temp.csv')

user AmerLifeLeague in process
user ProLifeDem in process
