# Search Tool Members of the Parliament: GoogleAPI - Classification

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import tweepy
from tqdm.notebook import tqdm
import time

# Classification using Voting Classifier
from batoomer.twitter_nodes import classification
from batoomer.extras.fo_utils import get_data_parl_fo
from batoomer.extras.fr_utils import get_data_parl_fr
from batoomer.extras.nd_utils import get_text_data_parl_nd
from batoomer.extras.tme_utils import get_numeric_data_parl_tme, get_text_data_parl_tme

In [2]:
twitter_credentials = []
with open('../../../../twitter_credentials.json', 'r') as f:
    twitter_credentials = json.load(f)        

# TwitterAPI Authentication
auth = tweepy.OAuthHandler(twitter_credentials['consumer_key'],
                                   twitter_credentials['consumer_secret'])
auth.set_access_token(twitter_credentials['access_token_key'],
                              twitter_credentials['access_token_secret'])
API = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, timeout=60*5, retry_count=5)

# CLassifier ND
---

# Fullnames

In [3]:
fn = pd.read_csv('Google-Search-Fullnames-Parliament-Members.csv')
fn = fn.replace(np.nan, '')
fn.head()

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,Result 6,Result 7
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,@ViliardosV,@ViliardosV,,,,,
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,@anaik2020,@kaityAlex,@ΖΩΗΣ,,,,
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,@MariaAt03902914,@desp1navandi,@MariaAt03902914,@Kathimerini_gr,,,
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,@B3Vagenas,,,,,,
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,,,,,,,


In [4]:
def get_ids(df):
    true_id = []
    for acc in tqdm(df, leave=False):
        if acc != '':
            try:
                user = API.get_user(acc)
                true_id.append(user.id)
            except tweepy.TweepError as err:
                true_id.append(0)
        else:
            true_id.append(0)
    return true_id

In [5]:
cols = ['Twitter Handle']
cols += [f'Result {i+1}' for i in range(7)]
for col in cols:
    fn[col] = get_ids(fn[col])

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

Rate limit reached. Sleeping for: 551


  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

In [9]:
clf = classification.Classifier(model_name='classifier_parl_nd', twitter_credentials=twitter_credentials, verbose=0)
results = pd.DataFrame()
counter = 1
for i in fn.index: 
    try:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(7)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])
            
    except Exception as err:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(7)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])

Progress: 299/300

In [25]:
results = results.replace(np.nan, 0)
results = results.astype(int)
results = results.reset_index().drop('index', axis = 1)

In [40]:
for i in range(5):
    fn[f'Result {i+1}'] = results[i]

In [43]:
fn.to_csv('Google-Search-Fullnames-ClassifierND-Parliament-Members.csv', index=False)

# Lastnames

In [45]:
fn = pd.read_csv('Google-Search-Surnames-Parliament-Members.csv')
fn = fn.replace(np.nan, '')
fn.head()

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,Result 6,Result 7
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,@ViliardosV,@ViliardosV,,,,,
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,@xrisalex,@aalexopoulos97,@xrisalex,@christina,,,
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,@Athanas_HS,@athanasiou_marios,@XarAthan,@XarAthan,@verou_ath,@AthanasiouTh,
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,@loukas_vagenas,@B3Vagenas,,,,,
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,@AnnaAsimakopoul,@anna_michelle_nd,@A6TzzGauUShtz5c,,,,


In [46]:
def get_ids(df):
    true_id = []
    for acc in tqdm(df, leave=False):
        if acc != '':
            try:
                user = API.get_user(acc)
                true_id.append(user.id)
            except tweepy.TweepError as err:
                true_id.append(0)
        else:
            true_id.append(0)
    return true_id

In [48]:
cols = ['Twitter Handle']
cols += [f'Result {i+1}' for i in range(7)]
for col in cols:
    fn[col] = get_ids(fn[col])

  0%|          | 0/300 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [50]:
clf = classification.Classifier(model_name='classifier_parl_nd', twitter_credentials=twitter_credentials, verbose=0)
results = pd.DataFrame()
counter = 1
for i in fn.index: 
    try:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(7)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])
            
    except Exception as err:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(7)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])

Progress: 299/300

In [51]:
results = results.replace(np.nan, 0)
results = results.astype(int)
results = results.reset_index().drop('index', axis = 1)

In [53]:
for i in range(4):
    fn[f'Result {i+1}'] = results[i]

In [56]:
fn.to_csv('Google-Search-Surnames-ClassifierND-Parliament-Members.csv', index=False)

# Fullnames Greeklish

In [57]:
fn = pd.read_csv('Google-Search-FullnamesLatin-Parliament-Members.csv')
fn = fn.replace(np.nan, '')
fn.head()

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,Result 6,Result 7,Result 8
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,@ViliardosV,@ViliardosV,,,,,,
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,@anaik2020,@kkroonenberg,@djnikas,,,,,
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,@mariaathanasiou,@athanasiou_marios,@45b1584e2f7d481,@athinadi,@hotelthesantamaria,,,
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,@DimitriosVagen1,,,,,,,
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,@eri__valentina,@village_gr,,,,,,


In [59]:
def get_ids(df):
    true_id = []
    for acc in tqdm(df, leave=False):
        if acc != '':
            try:
                user = API.get_user(acc)
                true_id.append(user.id)
            except tweepy.TweepError as err:
                true_id.append(0)
        else:
            true_id.append(0)
    return true_id

In [60]:
cols = ['Twitter Handle']
cols += [f'Result {i+1}' for i in range(7)]
for col in cols:
    fn[col] = get_ids(fn[col])

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

In [61]:
clf = classification.Classifier(model_name='classifier_parl_nd', twitter_credentials=twitter_credentials, verbose=0)
results = pd.DataFrame()
counter = 1
for i in fn.index: 
    try:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(7)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])
            
    except Exception as err:
        print(f'Progress: {i}/300', end='\r')
        nodes = list(fn[[f'Result {i+1}' for i in range(7)]].iloc[i].unique())
        nodes = [i for i in nodes if i != 0]
        if nodes:
            labels = clf.predict(nodes)
            filtr = pd.DataFrame([nodes, labels]).T
            filtr = filtr[filtr[1] == 1]
            if not filtr.empty:
                results = results.append(filtr[filtr[1] == 1][0])
            else:
                results = results.append([0])
        else:
            results = results.append([0])

Progress: 299/300

In [62]:
results = results.replace(np.nan, 0)
results = results.astype(int)
results = results.reset_index().drop('index', axis = 1)

In [63]:
results

Unnamed: 0,0,2,1,3
0,174226430,0,0,0
1,1168241295149740032,0,0,0
2,0,0,0,0
3,0,0,0,0
4,0,0,0,0
...,...,...,...,...
295,709702013865562112,0,0,0
296,1059819442572611584,0,0,0
297,1150482424134086656,0,0,0
298,0,0,0,0


In [64]:
for i in range(4):
    fn[f'Result {i+1}'] = results[i]

In [66]:
fn.to_csv('Google-Search-FullnamesLatin-ClassifierND-Parliament-Members.csv', index=False)