# Classification of Twitter user accounts

In [1]:
import m3inference
import botometer
import pprint
from os.path import join
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
import time

In [2]:
src = '../data'

## Botometer

### Perform the classification

In [3]:
# read the API keys
twitter_API_info = {}
with open("twitter_API_david.txt") as f:
    for line in f:
        key, val = line.partition("=")[::2]
        if key != 'bearer_token':
            twitter_API_info[key] = val.strip('\n')
            
with open("botometer_API.txt") as f:
    for line in f:
        key, val = line.partition("=")[::2]
        if key == 'x-rapidapi-key':
            rapidapi_key = val.strip('\n')

In [14]:
data = pd.read_csv(join(src, 'diagnosed_user_names.txt'), header=None)
data = data.rename(columns={0:'user_name'})

In [16]:
bom = botometer.Botometer(wait_on_ratelimit=True,
                          rapidapi_key=rapidapi_key,
                          **twitter_API_info)

In [19]:
batch_start = 25
batch_end = 175
chunk_size = 25
N_chunks = (batch_end - batch_start) //chunk_size

for chunk in range(N_chunks):
    start = batch_start + chunk * chunk_size
    end = batch_start + (chunk + 1) * chunk_size
    print('chunk {} - {}'.format(start, end))
    
    usernames = data.iloc[start:end]['user_name'].values
    json_data = []
    for screen_name, result in bom.check_accounts_in(usernames):
        print(screen_name)
        json_data.append({screen_name:result})
        
    with open(join(src, 'botscores', 'diagnosed_users_{}_to_{}.jsonl'\
                  .format(start, end)), 'a') as json_file:
        for entry in json_data:
            json.dump(entry, json_file)
            json_file.write('\n')

chunk 25 - 50
RasenBran
dantheman120894
kunjbihariarora
Paranormal_D_I
nickgarcia237
kittyjharry
_theSashaFierce
AaronArroyos
petponygirl
KC_Wombat
Spoiled__Beauty
delaneykalea
Marlita87
BLOODCODE
Deanmarkham1
StevieA42
geeky_fandom
NintendoBandage
BlondeBlogger
baabaa89
vucub_
JacquiePresley
verchetta
___beii
SteveM962
chunk 50 - 75
Savor_flavors
davewayne09
RatedDForDerp
SeanCMack
nychick823
ukSJS
Goose454
ArcJamsTTV
jschelleman
softeyes
holyrofler
NickLewis37
mewtmewt
Diabetes_HK
Erademach
sgprayer
bigdaddychance
caIlaber
Mr_Youngin1
ShaneKarma
bradjewell1050
heyitsemilyc
TheReason540
Chris_Stocker
Logan_SB
chunk 75 - 100
wildespinxsa
tabeeiscoo
livfaiers_
nohaaloha
THEPRETTTYBITCH
dremali
Moon_Pantaloons
KewiTheGirl
dodson_rusty
ONE___d1rection
nzscooper
_ThatGirlCee
DonnaAdams3
diabetesforums
GforGrayson
singa_terbang
JohnHaslam29
SamanthaDrye
Its_MissQ_ToYou
maxwell_the_cat
Champion_Church
lunapls
buybodybyvi
ViMyBodyVisalus
jamesdl5
chunk 100 - 125
whatcandiabetic
MillaJVzla
Per

### Gather the data

In [21]:
batch_start = 0
batch_end = 175
chunk_size = 25
N_chunks = (batch_end - batch_start) // chunk_size

json_data = []
for chunk in range(N_chunks):
    start = batch_start + chunk * chunk_size
    end = batch_start + (chunk + 1) * chunk_size
    print('chunk {} - {}'.format(start, end))
    
    with open(join(src, 'botscores', 'diagnosed_users_{}_to_{}.jsonl'\
                  .format(start, end)), 'r') as json_file:
        for l in json_file.readlines():
            if not l.strip (): # skip empty lines
                continue
            json_data.append(json.loads(l))

chunk 0 - 25
chunk 25 - 50
chunk 50 - 75
chunk 75 - 100
chunk 100 - 125
chunk 125 - 150
chunk 150 - 175


In [22]:
def flatten_botscore(entry):
    vals = entry[list(entry.keys())[0]]
    if 'error' in vals:
        print(entry)
        row = {
            'botometer_cap_english':np.nan,
            'botometer_cap_universal':np.nan,
            'botometer_raw_scores_english_astroturf':np.nan,
            'botometer_raw_scores_english_fake_follower':np.nan,
            'botometer_raw_scores_english_financial':np.nan,
            'botometer_raw_scores_english_other':np.nan,
            'botometer_raw_scores_english_overall':np.nan,
            'botometer_raw_scores_english_self_declared':np.nan,
            'botometer_raw_scores_english_spammer':np.nan,
            'botometer_raw_scores_universal_astroturf':np.nan,
            'botometer_raw_scores_universal_fake_follower':np.nan,
            'botometer_raw_scores_universal_financial':np.nan,
            'botometer_raw_scores_universal_other':np.nan,
            'botometer_raw_scores_universal_overall':np.nan,
            'botometer_raw_scores_universal_self_declared':np.nan,
            'botometer_raw_scores':np.nan,
            'botometer_majority_lang':np.nan,
            'botometer_username':list(entry.keys())[0],
            'botometer_user_ID':np.nan,
            'bot':np.nan
        }
    
    else:
        row = {
            'botometer_cap_english':vals['cap']['english'],
            'botometer_cap_universal':vals['cap']['universal'],
            'botometer_raw_scores_english_astroturf':vals['raw_scores']['english']['astroturf'],
            'botometer_raw_scores_english_fake_follower':vals['raw_scores']['english']['fake_follower'],
            'botometer_raw_scores_english_financial':vals['raw_scores']['english']['financial'],
            'botometer_raw_scores_english_other':vals['raw_scores']['english']['other'],
            'botometer_raw_scores_english_overall':vals['raw_scores']['english']['overall'],
            'botometer_raw_scores_english_self_declared':vals['raw_scores']['english']['self_declared'],
            'botometer_raw_scores_english_spammer':vals['raw_scores']['english']['spammer'],
            'botometer_raw_scores_universal_astroturf':vals['raw_scores']['universal']['astroturf'],
            'botometer_raw_scores_universal_fake_follower':vals['raw_scores']['universal']['fake_follower'],
            'botometer_raw_scores_universal_financial':vals['raw_scores']['universal']['financial'],
            'botometer_raw_scores_universal_other':vals['raw_scores']['universal']['other'],
            'botometer_raw_scores_universal_overall':vals['raw_scores']['universal']['overall'],
            'botometer_raw_scores_universal_self_declared':vals['raw_scores']['universal']['self_declared'],
            'botometer_raw_scores':vals['raw_scores']['universal']['spammer'],
            'botometer_majority_lang':vals['user']['majority_lang'],
            'botometer_username':vals['user']['user_data']['screen_name'],
            'botometer_user_ID':int(vals['user']['user_data']['id_str'])
        }
    
        if (vals['raw_scores']['english']['astroturf'] > vals['cap']['english']) or\
           (vals['raw_scores']['english']['fake_follower'] > vals['cap']['english']) or\
           (vals['raw_scores']['english']['financial'] > vals['cap']['english']) or\
           (vals['raw_scores']['english']['other'] > vals['cap']['english']) or\
           (vals['raw_scores']['english']['overall'] > vals['cap']['english']) or\
           (vals['raw_scores']['english']['self_declared'] > vals['cap']['english']) or\
           (vals['raw_scores']['english']['spammer'] > vals['cap']['english']):
            row.update({'bot':True})
        else:
            row.update({'bot':False})
    
    return row

In [23]:
botscores = pd.DataFrame()
for entry in json_data:
    botscores = botscores.append(flatten_botscore(entry), ignore_index=True)
    
botscores.to_csv(join(src, 'diagnosed_users_botscores.csv'),
                index=False)

In [46]:
data = pd.read_csv(join(src, 'user_diagnosis_dates.csv'))
data = data.set_index('author.username')

In [49]:
botscores = pd.read_csv(join(src, 'diagnosed_users_botscores.csv'))
botscores = botscores.dropna(subset=['botometer_user_ID'])
botscores['botometer_user_ID'] = botscores['botometer_user_ID'].astype(int)
botscores = botscores.set_index('botometer_username')

## Organisations, Gender and Age

### M3 inference

**Note:** This classification uses the Twitter v1 API. Re-querying all user profile info is somewhat redundant and a clutch. Should change this as soon as the m3-inference library can handle the v2 API.

In [57]:
m3Twitter = m3inference.M3Twitter(cache_dir=join(src, 'twitter_cache'))
m3Twitter.twitter_init_from_file('m3_auth.txt')

06/22/2021 09:17:47 - INFO - m3inference.m3inference -   Version 1.1.5
06/22/2021 09:17:47 - INFO - m3inference.m3inference -   Running on cpu.
06/22/2021 09:17:47 - INFO - m3inference.m3inference -   Will use full M3 model.
06/22/2021 09:17:47 - INFO - m3inference.m3inference -   Model full_model exists at /home/jana/m3/models/full_model.mdl.
06/22/2021 09:17:47 - INFO - m3inference.utils -   Checking MD5 for model full_model at /home/jana/m3/models/full_model.mdl
06/22/2021 09:17:48 - INFO - m3inference.utils -   MD5s match.
06/22/2021 09:17:48 - INFO - m3inference.m3inference -   Loaded pretrained weight at /home/jana/m3/models/full_model.mdl


True

In [60]:
# do the follower filter first, so we don't have to do the (computationally
# expensive) classification on the users we were going to throw away anyways
users = pd.read_csv(join(src, 'diagnosed_user_names.txt'), header=None)
users = users.rename(columns={0:'username'})
users.set_index('username', inplace=True)

In [61]:
users['female_prob'] = np.nan
users['age_<=18_prob'] = np.nan
users['age_19-29_prob'] = np.nan
users['age_30-39_prob'] = np.nan
users['age_>=40_prob'] = np.nan
users['org_prob'] = np.nan

# Note: at index 1910 the classifier started to produce a lot of 
# WARNINGS and wasn't able to fetch user info anymore. I suspect the
# problem is a rate limit.

# The Twitter API v1 user lookup rate limmit is 900 requests per 15 min
# time window
i = 0
curr_batch_counter = 0
rate_limit = 900
start = datetime.now()
respect = False

for username, row in users.iterrows():
    if i%10 == 0:
        print(i)
    
    # respect the rate limit
    if respect and curr_batch_counter == rate_limit:
        curr = datetime.now()
        diff = (curr - start).total_seconds()
        if  diff / 60.0 < 15:
            print('respecting the rate by sleeping {} seconds'.format(diff))
            time.sleep(15 * 60 - diff + 10) # sleep some more, just to be sure
        curr_batch_counter = 0
        start = datetime.now()
        
    pred = m3Twitter.infer_screen_name(username)['output']
    users.loc[username, 'female_prob'] = pred['gender']['female']
    users.loc[username, 'age_<=18_prob'] = pred['age']['<=18']
    users.loc[username, 'age_19-29_prob'] = pred['age']['19-29']
    users.loc[username, 'age_30-39_prob'] = pred['age']['30-39']
    users.loc[username, 'age_>=40_prob'] = pred['age']['>=40']
    users.loc[username, 'org_prob'] = pred['org']['is-org']
    i += 1
    curr_batch_counter += 1
    
users.to_csv(join(src, 'diagnosed_users_m3Classified.csv'))

06/22/2021 09:19:30 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for al25797448.
06/22/2021 09:19:30 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=al25797448


0


06/22/2021 09:19:33 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
06/22/2021 09:19:33 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for aussisassygob.
06/22/2021 09:19:33 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=aussisassygob
06/22/2021 09:19:35 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s]
06/22/2021 09:19:35 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for jl_schnelle.
06/22/2021 09:19:35 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=jl_schnelle
06/22/2021 09:19:37 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
06/22/2021 09:19:37 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for gothickhoneybee.
06/22/2021 09:19:3

10


06/22/2021 09:19:54 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.85it/s]
06/22/2021 09:19:54 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for honacostello.
06/22/2021 09:19:54 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=honacostello
06/22/2021 09:19:55 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
06/22/2021 09:19:56 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for bagelcate.
06/22/2021 09:19:56 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=bagelcate
06/22/2021 09:19:57 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
06/22/2021 09:19:58 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for daynabramston.
06/22/2021 09:19:58 - INFO

20


06/22/2021 09:20:12 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.58it/s]
06/22/2021 09:20:13 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for coffee_karla.
06/22/2021 09:20:13 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=coffee_karla
06/22/2021 09:20:14 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
06/22/2021 09:20:15 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for smartnfunny_.
06/22/2021 09:20:15 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=smartnfunny_
06/22/2021 09:20:16 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
06/22/2021 09:20:17 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for fidgetee1.
06/22/2021 09:20:17 - IN

30


06/22/2021 09:20:32 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
06/22/2021 09:20:32 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for _thesashafierce.
06/22/2021 09:20:32 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=_thesashafierce
06/22/2021 09:20:33 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
06/22/2021 09:20:34 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for aaronarroyos.
06/22/2021 09:20:34 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=aaronarroyos
06/22/2021 09:20:35 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s]
06/22/2021 09:20:36 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for petponygirl.
06/22/2021 09:20

40


06/22/2021 09:20:51 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
06/22/2021 09:20:51 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for geeky_fandom.
06/22/2021 09:20:51 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=geeky_fandom
06/22/2021 09:20:52 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.76it/s]
06/22/2021 09:20:53 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for nintendobandage.
06/22/2021 09:20:53 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=nintendobandage
06/22/2021 09:20:53 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
06/22/2021 09:20:54 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for blondeblogger.
06/22/2021 09:

50


06/22/2021 09:21:09 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.58it/s]
06/22/2021 09:21:09 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for davewayne09.
06/22/2021 09:21:09 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=davewayne09
06/22/2021 09:21:11 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.40it/s]
06/22/2021 09:21:11 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for rateddforderp.
06/22/2021 09:21:11 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=rateddforderp
06/22/2021 09:21:12 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.69it/s]
06/22/2021 09:21:13 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for seancmack.
06/22/2021 09:21:13 - IN

60


06/22/2021 09:21:30 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.66it/s]
06/22/2021 09:21:31 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for nicklewis37.
06/22/2021 09:21:31 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=nicklewis37
06/22/2021 09:21:33 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
06/22/2021 09:21:33 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for mewtmewt.
06/22/2021 09:21:33 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=mewtmewt
06/22/2021 09:21:35 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.54it/s]
06/22/2021 09:21:35 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for diabetes_hk.
06/22/2021 09:21:35 - INFO - m3i

70


06/22/2021 09:21:47 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
06/22/2021 09:21:47 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for heyitsemilyc.
06/22/2021 09:21:47 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=heyitsemilyc
06/22/2021 09:21:48 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]
06/22/2021 09:21:49 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for thereason540.
06/22/2021 09:21:49 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=thereason540
06/22/2021 09:21:55 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s]
06/22/2021 09:21:55 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for chris_stocker.
06/22/2021 09:21:55 

80


06/22/2021 09:22:09 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
06/22/2021 09:22:10 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for moon_pantaloons.
06/22/2021 09:22:10 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=moon_pantaloons
06/22/2021 09:22:12 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.69it/s]
06/22/2021 09:22:12 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for kewithegirl.
06/22/2021 09:22:12 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=kewithegirl
06/22/2021 09:22:13 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.97it/s]
06/22/2021 09:22:14 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for dodson_rusty.
06/22/2021 09:22:

90


06/22/2021 09:22:25 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
06/22/2021 09:22:26 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for johnhaslam29.
06/22/2021 09:22:26 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=johnhaslam29
06/22/2021 09:22:26 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
06/22/2021 09:22:27 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for samanthadrye.
06/22/2021 09:22:27 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=samanthadrye
06/22/2021 09:22:27 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.62it/s]
06/22/2021 09:22:28 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for its_missq_toyou.
06/22/2021 09:22:2

100


06/22/2021 09:22:41 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s]
06/22/2021 09:22:42 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for millajvzla.
06/22/2021 09:22:42 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=millajvzla
06/22/2021 09:22:43 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.39it/s]
06/22/2021 09:22:44 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for perifect.
06/22/2021 09:22:44 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=perifect
06/22/2021 09:22:46 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.99it/s]
06/22/2021 09:22:46 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for health_bits.
06/22/2021 09:22:46 - INFO - m3inf

110


06/22/2021 09:22:58 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
06/22/2021 09:22:59 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for aldivarubalcava.
06/22/2021 09:22:59 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=aldivarubalcava
06/22/2021 09:22:59 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]
06/22/2021 09:22:59 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for mullen1065.
06/22/2021 09:22:59 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=mullen1065
06/22/2021 09:23:02 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.90it/s]
06/22/2021 09:23:02 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for lilinikole.
06/22/2021 09:23:02 -

120


06/22/2021 09:23:14 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s]
06/22/2021 09:23:15 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for diabetesbuzz9.
06/22/2021 09:23:15 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=diabetesbuzz9
06/22/2021 09:23:15 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
06/22/2021 09:23:15 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for kirstransting.
06/22/2021 09:23:15 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=kirstransting
06/22/2021 09:23:16 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]
06/22/2021 09:23:17 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for wmjackson.
06/22/2021 09:23:17 

130


06/22/2021 09:23:28 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.52it/s]
06/22/2021 09:23:28 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for jhondavidson.
06/22/2021 09:23:28 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=jhondavidson
06/22/2021 09:23:30 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.44it/s]
06/22/2021 09:23:30 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for gapsmcgee.
06/22/2021 09:23:30 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=gapsmcgee
06/22/2021 09:23:36 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
06/22/2021 09:23:36 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for six966djjuwh.
06/22/2021 09:23:36 - INFO 

140


06/22/2021 09:23:45 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.95it/s]
06/22/2021 09:23:45 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for theaace.
06/22/2021 09:23:45 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=theaace
06/22/2021 09:23:46 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.45it/s]
06/22/2021 09:23:47 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for ethan_288.
06/22/2021 09:23:47 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=ethan_288
06/22/2021 09:23:47 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
06/22/2021 09:23:47 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for seriouslywine.
06/22/2021 09:23:47 - INFO - m3infer

150


06/22/2021 09:24:02 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.69it/s]
06/22/2021 09:24:02 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for ambassadorick.
06/22/2021 09:24:02 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=ambassadorick
06/22/2021 09:24:03 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.41it/s]
06/22/2021 09:24:04 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for recipesfree.
06/22/2021 09:24:04 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=recipesfree
06/22/2021 09:24:05 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
06/22/2021 09:24:05 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for annuitypayments.
06/22/2021 09:24:0

160


06/22/2021 09:24:16 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.67it/s]
06/22/2021 09:24:17 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for sweetlyaroundme.
06/22/2021 09:24:17 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=sweetlyaroundme
06/22/2021 09:24:19 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.81it/s]
06/22/2021 09:24:19 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for realrealpeople.
06/22/2021 09:24:19 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=realrealpeople
06/22/2021 09:24:20 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.78it/s]
06/22/2021 09:24:20 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for fuckurlife.
06/22/2021 09

170


06/22/2021 09:24:32 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
06/22/2021 09:24:33 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for tennhawkman.
06/22/2021 09:24:33 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=tennhawkman
06/22/2021 09:24:34 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
06/22/2021 09:24:35 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for zenjar.
06/22/2021 09:24:35 - INFO - m3inference.m3twitter -   GET /users/show.json?screen_name=zenjar
06/22/2021 09:24:36 - INFO - m3inference.dataset -   1 data entries loaded.
Predicting...: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
06/22/2021 09:24:36 - INFO - m3inference.m3twitter -   Results not in cache. Fetching data from Twitter for mysticprincess.
06/22/2021 09:24:36 - INFO - m3in

In [63]:
users['org'] = users['org_prob'] > 0.5

In [75]:
botscores['bot'] = botscores['bot'].replace({0:False, 1:True})
merged = data.merge(botscores['bot'], left_index=True, right_index=True)
merged = merged.merge(users['org'], left_index=True, right_index=True)
merged = merged.reset_index().rename(columns={'index':'username'})
merged.to_csv(join(src, 'user_bot_and_org_flags.csv'), index=False)

In [74]:
data[(data['bot'] == False) & (data['org'] == False)]

Unnamed: 0,author.id,created_at_date,bot,org
HonkeyKong,7348342,2010-06-16,False,False
softeyes,7572652,2015-02-12,False,False
Buckman,14129958,2010-12-01,False,False
sweetlyaroundme,14431614,2009-09-18,False,False
BlondeBlogger,14555801,2016-09-29,False,False
...,...,...,...,...
Spoiled__Beauty,807420105848160256,2017-08-11,False,False
MAKREACTS,855798951915773953,2019-04-10,False,False
smartnfunny_,886644350817619968,2019-03-30,False,False
GothickHoneybee,1076044413263011840,2020-11-06,False,False
