# Italian Politicians Account Network Scraping


In [1]:
import tweepy
import pandas as pd
import datetime 
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt

c:\Users\Dylan\AppData\Local\Programs\Python\Python39\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\Dylan\AppData\Local\Programs\Python\Python39\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll


In [2]:
consumer_key = 'xxx'
consumer_secret = 'xxx'
access_token = 'xxx'
access_token_secret = 'xxx'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth=auth, wait_on_rate_limit=True)

In [3]:
def fetch_contacts(api, user_list, contacts):
    """
        Obtains friends and followers of users from a given list of users.

        - if contacts = 'friends' fetches the profiles followed by each user in the list;
        - if contacts = 'followers' fetches the profiles following each user in the list;
        - if contacts = 'all', performs both operations.

        The contacts are stored in a DataFrame, where each row represents an instance of a relationship
        i.e. the profile in the 'source' col follows the profile in the 'target' col.
    
    :param List[twitter.Api] api: a list with one or more Twitter API instances;
    :param list user_list: a list of screen names of users to fetch friends and followers from;
    :param str contacts: the kind of contacts wanted for each user in the list:
        -'friends' are profiles followed by the user(s);
        -'followers' are profiles following the user(s);
        -'all' is the union of friends and followers.
    :return: a DataFrame with friends, followers or both for each screen name in the user_list. 
    """
    df = pd.DataFrame(columns=['source','target']) # empty df

    if contacts == 'friends':
    # fetching friends
        for u in user_list:
            friends = []
            friends_list = []
            # fetching the user
            user = api.get_user(screen_name=u)
            # fetching the friends_count
            friends_count = user.friends_count
            try:
                for page in tweepy.Cursor(api.get_friends, screen_name=u).pages():
                    friends.extend(page)
                    #if friends_count >= 5000: #Only take first 5000 friends
                        #break
            except tweepy.TweepyException:
                print("error")
                continue
            print(user.screen_name, 'follows', len(friends), 'profiles')
            # appending names to friend list
            for friend in friends:
                friends_list.append(friend.screen_name)
            temp1 = pd.DataFrame(columns=['source', 'target'])
            temp1['target'] = friends_list # set the list of friends as the target column
            temp1['source'] = u # set the user ID as the source 
            df = df.append(temp1)

    elif contacts == 'followers':
    # fetching followers
        for u in user_list:
            followers = []
            follower_list = []
            # fetching the user
            user = api.get_user(screen_name=u)
            # fetching the followers_count
            followers_count = user.followers_count
            try:
                for page in tweepy.Cursor(api.get_followers, screen_name=u).pages():
                    followers.extend(page)
                    if followers_count >= 5000: #Only take first 5000 followers
                        break
            except tweepy.TweepyException:
                print("error")
                continue
            print(user.screen_name, 'is followed by', len(followers), 'profiles')
            # appending names to followers list
            for follower in followers:
                follower_list.append(follower.screen_name)
            temp2 = pd.DataFrame(columns=['source', 'target'])
            temp2['source'] = follower_list # set the list of followers as the source column
            temp2['target'] = u # set the user ID as the target (user is followed by followers)
            df = df.append(temp2)
            
    elif contacts == 'all':
    # fetching friends
        for u in user_list:
            friends = []
            friends_list = []
            # fetching the user
            user = api.get_user(screen_name=u)
            # fetching the friends_count
            friends_count = user.friends_count
            try:
                for page in tweepy.Cursor(api.get_friends, screen_name=u).pages():
                    friends.extend(page)
                    if friends_count >= 5000: #Only take first 5000 friends
                        break
            except tweepy.TweepyException:
                print("error")
                continue
            # appending names to friend list
            for friend in friends:
                friends_list.append(friend.screen_name)
            temp1 = pd.DataFrame(columns=['source', 'target'])
            temp1['target'] = friends_list # set the list of friends as the target column
            temp1['source'] = u # set the user ID as the source 
            df = df.append(temp1)
    # fetching followers
        for u in user_list:
            followers = []
            follower_list = []
            # fetching the user
            user = api.get_user(screen_name=u)
            # fetching the followers_count
            followers_count = user.followers_count
            try:
                for page in tweepy.Cursor(api.get_followers,screen_name=u).pages():
                    followers.extend(page)
                    if followers_count >= 5000: #Only take first 5000 followers
                        break
            except tweepy.TweepyException:
                print("error")
                continue
            print(user.screen_name, 'follows', len(friends), 'profiles \n', 
                user.screen_name, 'is followed by', len(followers), 'profiles')
            for follower in followers:
                follower_list.append(follower.screen_name)
            temp2 = pd.DataFrame(columns=['source', 'target'])
            temp2['source'] = follower_list # set the list of followers as the source column
            temp2['target'] = u # set the user ID as the target (user is followed by followers)
            df = df.append(temp2)    
    else: 
        raise ValueError("Unknown mode: contacts should be one of 'friends', 'followers', 'all'")
    return df 

In [3]:
def twitter_monitor(api, user_list):
    """
      Fetches a list of features from each account in user_list and saves them into a DataFrame

    :param List[twitter.Api] api: a list with one or more Twitter API instances;
    :param list user_list: a list of screen names of users to fetch features from;
    :returns: a DataFrame where each row represents a user and each column a feature.
    """
    df = pd.DataFrame(columns=['screen_name','description', 'statuses_count', 'friends_count', 'followers_count',
                            'account_age_days', 'average_tweets'])
    for account in user_list:
      temp = pd.DataFrame(columns=['screen_name','description', 'statuses_count', 'friends_count', 'followers_count', 
                                'account_age_days', 'average_tweets'])
      user = api.get_user(screen_name=account)
      screen_name = str(user.screen_name)
      description = str(user.description)
      statuses_count = str(user.statuses_count)
      friends_count = str(user.friends_count)
      followers_count = str(user.followers_count)
      tweets = str(user.statuses_count)
      account_created_date = user.created_at.date()
      delta = datetime.date.today() - account_created_date
      account_age_days = delta.days
      average_tweets= str(round(int(tweets)/int(account_age_days),2))
      temp.loc[len(temp)] = [screen_name, description, statuses_count, 
                            friends_count, followers_count, account_age_days, average_tweets]
      df = df.append(temp)
    return df

## Collecting Friends of Italian Politicians

In [4]:
italian_politicians = ["EnricoLetta","GiuseppeConteIT","luigidimaio","matteorenzi","GiorgiaMeloni","matteosalvinimi","CarloCalenda","berlusconi","civati", "NFratoianni", "AngeloBonelli1", "emmabonino"]

political_net = fetch_contacts(api=api, user_list=italian_politicians, contacts='friends')

In [5]:
recap = twitter_monitor(api=api, user_list=italian_politicians)
recap

Unnamed: 0,screen_name,description,statuses_count,friends_count,followers_count,account_age_days,average_tweets
0,EnricoLetta,"Segretario del Partito Democratico @pdnetwork,...",18279,14104,722299,3949,4.63
0,GiuseppeConteIT,Giurista e Avvocato. Professore ordinario di d...,2291,138,1134921,1575,1.45
0,luigidimaio,Ministro degli Affari Esteri e della Cooperazi...,7185,331,776440,4838,1.49
0,matteorenzi,Senatore di Firenze - @ItaliaViva,14040,979,3373484,4998,2.81
0,GiorgiaMeloni,"Romana, classe '77. Politica e giornalista. Se...",19702,927,1272567,4544,4.34
0,matteosalvinimi,Leader della Lega #25settembrevotoLega 🇮🇹,54047,2007,1495439,4194,12.89
0,CarloCalenda,"@Azione_it - ""Un uomo che non si interessa del...",34893,162,409522,3093,11.28
0,berlusconi,Presidente Forza Italia. Eurodeputato Gruppo d...,5270,67,224332,1794,2.94
0,civati,,57335,6375,409241,5303,10.81
0,NFratoianni,"segretario nazionale di Sinistra Italiana, dom...",62943,2014,65863,3941,15.97


In [7]:
# recap.to_csv('additional_info_politics.csv', index=False)

In [None]:
# political_net.to_csv("politicians_network_v1.csv") # saving the df to csv