# Packages

In [109]:
# packages
import tweepy
import pandas as pd
import os

print("Tweepy version: " + tweepy.__version__)
print("Pandas version: " + pd.__version__)

Tweepy version: 4.4.0
Pandas version: 1.3.4


In [110]:
# import tokens from config.py file
if os.path.isfile("config.py"):
    print("config.py exists\nAPI keys and tokens are imported")
    from config import consumer_key, consumer_secret, access_token, access_token_secret
else:
    print("config.py does not exist\nPlease add config.py to proceed")

config.py exists
API keys and tokens are imported


# Setting up API

In [111]:
# setup consumer API key
auth = tweepy.OAuthHandler(
    consumer_key,
    consumer_secret
)

# setup access token
auth.set_access_token(
    access_token,
    access_token_secret
)

# create API variable
api = tweepy.API(
    auth, 
    wait_on_rate_limit = True
)

In [112]:
# check if API credentials work
try:
    api.verify_credentials()
    print("Authentication OK")
except:
    print("Error during authentication")

Authentication OK


# Testing Twitter API

In [113]:
# test if user_timeline method works with own twitter account
tweets_lw = api.user_timeline(
    screen_name = "lukas_warode",
    count = 1,
    include_rts = False,
    tweet_mode = 'extended'
)

# print type of user_timeline method object
type(tweets_lw)

# function to convert tweepy object to a pandas dataframe
def timeline_to_df(tweepy_timeline):
    json_data = [r._json for r in tweepy_timeline]
    df = pd.json_normalize(json_data)
    return df

# apply function 
tweets_lw_df = timeline_to_df(tweets_lw)

# print full text column of tweet dataframe
pd.options.display.max_colwidth = int(tweets_lw_df["full_text"].str.len())
print(tweets_lw_df["full_text"])

0    @p_c_bauer @MichaelImre Nice project! Seems to be a very rare coincidence, I worked basically on the same project last year while using the same name 😄\nhttps://t.co/QQtOQ...
Name: full_text, dtype: object


# Use csv file from WZB project to extract list of German MPs' Twitter accounts
## (Project author: Markus Konrad)

In [114]:
# read csv as dataframe from GitHub repository
wzb_df = pd.read_csv("https://raw.githubusercontent.com/WZBSocialScienceCenter/mdb-twitter-network/master/data/deputies_twitter_20190702.csv")

# create subset with the 2 relevant columns and drop NAs
twitter_df = wzb_df[["twitter_name", "party"]].dropna()

# Sampling approaches
## a) Get random MP Twitter handles

In [115]:
# Function to extract random MPs' Twitter handeles
def random_sample_handle(df, n):
    sample = df[["twitter_name"]].sample(n = n)
    name_string = sample.to_string(index = False, header = False)
    return name_string

# apply function
print(
    random_sample_handle(
        df = twitter_df,
        n = 5
    )
)

christianhirte
    mwbirkwald
 hildemattheis
    stbrandner
   monikalazar


## b) Extract Twitter handles by popularity

In [116]:
# follower count function
def follower_count_fun(twitter_handle):
    try: 
        user = api.get_user(screen_name = twitter_handle)
        count = user.followers_count
        return count
    except tweepy.TweepyException:
        pass

# for demonstration and simplification purposes we create a subset with Green MPs
twitter_df_greens = twitter_df[twitter_df["party"] == "DIE GRÜNEN"]

# store Twitter handles as list from data frame (column) with a function
def col_to_tidy_list(df, col):
    col_string = df[[col]].to_string(index = False, header = False)
    tidy_string = col_string.replace(" ", "")
    tidy_list = tidy_string.split("\n")
    return tidy_list

# test and print results
twitter_handles_list = col_to_tidy_list(
    twitter_df_greens,
    "twitter_name"
)

print(twitter_handles_list)

['kirstenkappert', 'konstantinnotz', 'markuskurthmdb', 'babetteschefin', 'sven_kindler', 'agnieszka_mdb', 'goeringeckardt', 'markustressel', 'beatewaro', 'julia_verlinden', 'jtrittin', 'k_sa', 'ulle_schauws', 'schickgerhard', 'manuelsarrazin', 'tabearoessner', 'crueffer', 'lisapaus', 'fostendorff', 'cem_oezdemir', 'nouripour', 'gruenebeate', 'irenemihalic', 'tobiaslindner', 'steffilemke', 'monikalazar', 'renatekuenast', 'chriskuehn_mdb', 'stephankuehn', 'oliver_krischer', 'mariaklschmeink', 'uwekekeritz', 'djanecek', 'brihasselmann', 'hajdukbundestag', 'kaigehring', 'matthiasgastel', 'katjadoerner', 'katdro', 'ebner_sha', 'ekindeligoez', 'fbrantner', 'kerstinandreae', 'abaerbock', 'w_sk', 'lieblingxhain', 'stefangelbhaar', 'danywagner_da', 'badulrichmartha', 'gruenclaudia', 'derdanyal', 'margaretebause', 'filizgreen', 'owvonholtz', 'svenlehmann', 'annachristmann']


In [117]:
# apply function in a for loop and store follower count in list
follower_count_list = []

for twitter_name in twitter_handles_list:
    follower_count_list.append(
        follower_count_fun(twitter_name)
    )

In [118]:
# print results 
print(follower_count_list)

[6167, 84887, 3963, None, 19407, 13571, 200394, 2009, None, 9568, 115090, 7408, 7677, 11898, 6524, 9205, 3428, 10948, None, 278473, 27549, 5743, 8851, 8865, 17131, 6333, 76462, 4417, 7486, 19507, 7635, 3352, 13609, 35223, 2375, 11896, 7409, 17848, 8364, 5335, 9343, 12710, 8861, 408889, 7506, 12857, 7740, 1705, 5584, 3047, 16974, 7331, 5539, 1740, 21824, 5047]


In [119]:
# add follower count list to data fraeme as a numeric column
twitter_df_greens["follower_count"] = follower_count_list

# print transformed data frame
print(twitter_df_greens)

        twitter_name       party  follower_count
34    kirstenkappert  DIE GRÜNEN          6167.0
47    konstantinnotz  DIE GRÜNEN         84887.0
67    markuskurthmdb  DIE GRÜNEN          3963.0
68    babetteschefin  DIE GRÜNEN             NaN
71      sven_kindler  DIE GRÜNEN         19407.0
84     agnieszka_mdb  DIE GRÜNEN         13571.0
92    goeringeckardt  DIE GRÜNEN        200394.0
98     markustressel  DIE GRÜNEN          2009.0
122        beatewaro  DIE GRÜNEN             NaN
129  julia_verlinden  DIE GRÜNEN          9568.0
135         jtrittin  DIE GRÜNEN        115090.0
166             k_sa  DIE GRÜNEN          7408.0
176     ulle_schauws  DIE GRÜNEN          7677.0
179    schickgerhard  DIE GRÜNEN         11898.0
185   manuelsarrazin  DIE GRÜNEN          6524.0
189    tabearoessner  DIE GRÜNEN          9205.0
194         crueffer  DIE GRÜNEN          3428.0
221         lisapaus  DIE GRÜNEN         10948.0
226      fostendorff  DIE GRÜNEN             NaN
228     cem_oezdemir

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twitter_df_greens["follower_count"] = follower_count_list


In [129]:
# filter observation with highest follower count
max_followers = twitter_df_greens["follower_count"].max()
twitter_df_greens[twitter_df_greens["follower_count"] == max_followers]

most_followers_mp = twitter_df_greens[twitter_df_greens["follower_count"] == max_followers]

TypeError: '(34     False
47     False
67     False
68     False
71     False
84     False
92     False
98     False
122    False
129    False
135    False
166    False
176    False
179    False
185    False
189    False
194    False
221    False
226    False
228    False
229    False
242    False
251    False
270    False
276    False
279    False
288    False
290    False
291    False
296    False
304    False
317    False
326    False
352    False
357    False
375    False
380    False
397    False
399    False
401    False
402    False
416    False
439    False
443     True
446    False
466    False
468    False
491    False
499    False
506    False
507    False
540    False
553    False
554    False
647    False
649    False
Name: follower_count, dtype: bool, 'twitter_name')' is an invalid key

In [120]:

# follower_count_dict = dict.fromkeys(list(twitter_handles_greens))

# print(follower_count_dict)

# for twitter_name in twitter_handles_greens:
#     follower_count_dict[twitter_name].append(follower_count_fun(twitter_name))


In [121]:
# baerbock_tweets = api.user_timeline(
#     screen_name = "",
#     count = Inf,
#     include_rts = False,
#     tweet_mode = 'extended'
# )

# tweepy.Cursor(
#     include
# )

In [122]:
# # Function to extract follower count with twitter handle
# def follower_count_fun(twitter_handle):
#     try: 
#         user = api.get_user(screen_name = twitter_handle)
#         count = user.followers_count
#         return count
#     except tweepy.TweepyException:
#         pass

# twitter_df_sample = twitter_df.sample(5)

# test_user = twitter_df.iloc[1:4, 0].to_string(index = False, header = False)

# for i in test_user:
#     print(i)
# # twitter_df_sample.assign(
# #     follower_count = api.get_user(screen_name = twitter_df[["twitter_name"]]).followers_count
# # )

# # follower_count_fun(
# #     twitter_df.iloc[5, 0].to_string(index = False, header = False)
# # )
    
# # twitter_df_sample = twitter_df.sample(10)

# # twitter_df_sample.assign(
# #     follower_count = follower_count_fun(twitter_df[["twitter_name"]])
# # )


# # print(
# #     tweepy.api.get_user(id = "karl_lauterbach")
# # )

# # twitter_handles

# # print(twitter_handles[1])

# # for mp in twitter_handles:
#     # print(mp)

# # for mp in twitter_handles:
# #     # user = api.get_user(id = mp)
# #     print(
# #         api.get_user(id = mp).followers_count
# #     )

# # join(twitter_handles[1])
    


# # user = api.get_user(id = twitter_handles[1])

# # user.followers_count