# Search Tool Members of the Parliament: TwitterAPI - Classification

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import tweepy
from tqdm.notebook import tqdm
import time

# Classification using Voting Classifier
from batoomer.twitter_nodes import classification
from batoomer.extras.fo_utils import get_data_parl_fo
from batoomer.extras.fr_utils import get_data_parl_fr
from batoomer.extras.nd_utils import get_text_data_parl_nd
from batoomer.extras.tme_utils import get_numeric_data_parl_tme, get_text_data_parl_tme

In [2]:
twitter_credentials = []
with open('../../../../twitter_credentials.json', 'r') as f:
    twitter_credentials = json.load(f)        

# TwitterAPI Authentication
auth = tweepy.OAuthHandler(twitter_credentials['consumer_key'],
                                   twitter_credentials['consumer_secret'])
auth.set_access_token(twitter_credentials['access_token_key'],
                              twitter_credentials['access_token_secret'])
API = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, timeout=60*5, retry_count=5)

# CLassifier ND
---

# Fullnames

In [55]:
fn = pd.read_csv('Twitter-Search-Fullnames-Parliament-Members.csv')
fn = fn.replace(np.nan, '')
fn.head()

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,...,Result 11,Result 12,Result 13,Result 14,Result 15,Result 16,Result 17,Result 18,Result 19,Result 20
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,2801769645,1369317869264248838,3883801823,4841853117,1352034271477837826,...,1272520399973232640,774353219464093696,712359665833795585,905126924397281282,513081418,1263928166546096129,3346441403,764794474144858112,4841988202,409176426
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [56]:
def get_ids(df):
    true_id = []
    for acc in tqdm(df, leave=False):
        if acc != '':
            try:
                user = API.get_user(acc)
                true_id.append(user.id)
            except tweepy.TweepError as err:
                true_id.append(0)
        else:
            true_id.append(0)
    return true_id

In [57]:
cols = ['Twitter Handle']
for col in cols:
    fn[col] = get_ids(fn[col])

  0%|          | 0/300 [00:00<?, ?it/s]

In [58]:
clf = classification.Classifier(model_name='classifier_parl_nd', twitter_credentials=twitter_credentials, verbose=0)
results = pd.DataFrame()
counter = 1
for i in fn.index: 
    try:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(20)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])
            
    except Exception as err:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(20)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])

Progress: 299/300

In [59]:
results = results.replace(np.nan, 0)
results = results.astype(int)
results = results.reset_index().drop('index', axis = 1)

In [60]:
results

Unnamed: 0,0,1,4
0,0,0,0
1,0,0,0
2,0,0,0
3,0,0,0
4,0,0,0
...,...,...,...
295,709702013865562112,0,0
296,0,0,0
297,0,0,0
298,0,0,0


In [61]:
for i, col in enumerate(results.columns):
    fn[f'Result {i+1}'] = results[col]

In [63]:
fn = fn.drop([f'Result {i}' for i in range(3, 21)], axis =1)

In [67]:
fn.to_csv('Twitter-Search-Fullnames-ClassifierND-Parliament-Members.csv', index=False)

# Surnames

In [30]:
fn = pd.read_csv('Twitter-Search-Surnames-Parliament-Members.csv')
fn = fn.replace(np.nan, '')
fn.head()

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,...,Result 11,Result 12,Result 13,Result 14,Result 15,Result 16,Result 17,Result 18,Result 19,Result 20
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,174226430,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,126143952,93629265,282486425,2596344709,167572986,...,2569485777,2329900741,2694843255,2370663787,4641601163,1373670869613084674,507489429,4209308067,3110162349,1189778967546474498
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,965785002,2840171877,424495304,1847450509,484212704,...,1697401291,1050337249496391681,799200059464318976,3253495824,2466023912,582976052,984469619023994880,705406032,886626800,860197691447549954
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,1977451178,1049215741,1534824312,1264145037929185280,739463983405662208,...,2400905457,1302266183790604290,3167304245,4219002089,1209556114725130240,3351186423,970798547460395008,3244790992,2161086549,704032421696643073
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,533041104,998828010181709825,1148229718627540992,405567257,1216171735,...,3107550591,3029871778,1851852092,863376977331400705,2463333126,842354478,1361908210068836353,4179542776,3172032267,3923205689


In [31]:
def get_ids(df):
    true_id = []
    for acc in tqdm(df, leave=False):
        if acc != '':
            try:
                user = API.get_user(acc)
                true_id.append(user.id)
            except tweepy.TweepError as err:
                true_id.append(0)
        else:
            true_id.append(0)
    return true_id

In [32]:
cols = ['Twitter Handle']
for col in cols:
    fn[col] = get_ids(fn[col])

  0%|          | 0/300 [00:00<?, ?it/s]

In [33]:
clf = classification.Classifier(model_name='classifier_parl_nd', twitter_credentials=twitter_credentials, verbose=0)
results = pd.DataFrame()
counter = 1
for i in fn.index: 
    try:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(20)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])
            
    except Exception as err:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(20)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])

Progress: 299/300

In [34]:
results = results.replace(np.nan, 0)
results = results.astype(int)
results = results.reset_index().drop('index', axis = 1)

In [36]:
for i, col in enumerate(results.columns):
    fn[f'Result {i+1}'] = results[col]

In [39]:
fn = fn.drop([f'Result {i}' for i in range(6, 21)], axis =1)

In [40]:
fn

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,174226430,174226430,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,1168241295149740038,0,93629265,0,0,0
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,1153988844498444288,965785002,2840171877,0,0,0
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,1302266183790604290,0,0,0,0,0
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,1148229718627540992,533041104,0,1148229718627540992,0,0
...,...,...,...,...,...,...,...,...,...,...
295,Σαρακιώτης Αθανασίου Ιωάννης,Σαρακιώτης,Σαρακιώτης Ιωάννης,ΣΥΡΙΖΑ,709702013865562112,709702013865562112,0,0,0,0
296,Πέρκα Χαράλαμπου Θεοπίστη (Πέτη),Πέρκα,Πέρκα Πέτη,ΣΥΡΙΖΑ,1059819442572611584,0,0,0,0,0
297,Μάλαμα Ιωάννη Κυριακή,Μάλαμα,Μάλαμα Κυριακή,ΣΥΡΙΖΑ,1150482424134086659,0,0,0,0,0
298,Πολάκης Πέτρου Παύλος,Πολάκης,Πολάκης Παύλος,ΣΥΡΙΖΑ,2367424185,0,0,0,0,0


In [41]:
fn.to_csv('Twitter-Search-Surnames-ClassifierND-Parliament-Members.csv', index=False)

# Surnames Greeklish

In [43]:
fn = pd.read_csv('Twitter-Search-SurnamesLatin-Parliament-Members.csv')
fn = fn.replace(np.nan, '')
fn.head()

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,...,Result 11,Result 12,Result 13,Result 14,Result 15,Result 16,Result 17,Result 18,Result 19,Result 20
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,174226430,246390608,849013729,856448283547320322,1032949155314970624,...,0,0,0,0,0,0,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,320162191,73234260,3373083677,1187678476977000448,1109429795962662912,...,1168241295149740038,263784628,599366320,171661692,1325123115366707200,576183073,1194241307251789826,51597099,223945318,72824620
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,1088970967,880033291,2591503675,2495322266,103608120,...,35453950,517875912,812762451485818881,230836510,53700015,421787378,1192655092400283648,703958160,711712332,38287046
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,253071752,3340651392,808532589640171520,277694245,879300336,...,63101531,538678651,403661679,137151534,1049215741,1002016027,897883385745551360,3291502453,1118028346145824771,90375469
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,533041104,714120024,2829422892,1305799456080695296,2249352493,...,1342908256591736834,275945170,1127930818712743936,2436824634,3814339281,931266633351516160,813814300968108032,1276660778519670786,2569601167,1188808110074413057


In [44]:
def get_ids(df):
    true_id = []
    for acc in tqdm(df, leave=False):
        if acc != '':
            try:
                user = API.get_user(acc)
                true_id.append(user.id)
            except tweepy.TweepError as err:
                true_id.append(0)
        else:
            true_id.append(0)
    return true_id

In [45]:
cols = ['Twitter Handle']
for col in cols:
    fn[col] = get_ids(fn[col])

  0%|          | 0/300 [00:00<?, ?it/s]

In [46]:
clf = classification.Classifier(model_name='classifier_parl_nd', twitter_credentials=twitter_credentials, verbose=0)
results = pd.DataFrame()
counter = 1
for i in fn.index: 
    try:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(20)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])
            
    except Exception as err:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(20)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])

Progress: 297/300

Rate limit reached. Sleeping for: 191


Progress: 299/300

In [47]:
results = results.replace(np.nan, 0)
results = results.astype(int)
results = results.reset_index().drop('index', axis = 1)

In [48]:
results

Unnamed: 0,0,10,1,3,8,9,2,6,4,12,13,16,5,7,14,17,15,19,18
0,174226430,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1168241295149740032,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,533041104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,709702013865562112,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
296,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
297,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
298,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [50]:
for i, col in enumerate(results.columns):
    fn[f'Result {i+1}'] = results[col]

In [52]:
fn = fn.drop([f'Result {i}' for i in range(19, 21)], axis =1)

In [54]:
fn.to_csv('Twitter-Search-SurnamesLatin-ClassifierND-Parliament-Members.csv', index=False)