# Search Tool Members of the Parliament: TwitterAPI

----
Libraries:

In [1]:
import re
import numpy as np
import pandas as pd
import json 
from batoomer.twitter_nodes.search_engine import TwitterSearchEngine
from googleapiclient.errors import HttpError
from tqdm.notebook import tqdm
import time

---
Load Google Credentials and initialize GoogleSearchEngine

In [2]:
keys = []
with open('../../../../twitter_credentials.json', 'r') as f:
    keys = json.load(f)

se = TwitterSearchEngine(twitter_credentials=keys, )

---
## Dataset

In [3]:
data = pd.read_csv('../../../../datasets/Greek Politicians/search-tool/parliament-members-new.csv')
data.replace(np.nan, '', inplace=True)
data.head(5)

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak


---
## Search: Fullname

In [6]:
queries = list(data['Name'])
results = pd.DataFrame()

for query in tqdm(queries):
    try:
        se.search(query=query, count=10)
        result = se.get_results()
        results= results.append(result)
    except Exception as err:
        print(err)

  0%|          | 0/300 [00:00<?, ?it/s]

In [8]:
results.replace(np.nan, 0, inplace=True)

In [10]:
results_df = (results
           .replace(np.nan, '')
           .rename(columns={'Query':'Name'})
           .reset_index()
           .drop('index', axis=1))
results_df = pd.merge(data, results_df, on=results_df.index, how='outer').drop(['key_0', 'Name_y'], axis =1).rename(columns={'Name_x':'Name'})
results_df.head(5)

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,...,Result 11,Result 12,Result 13,Result 14,Result 15,Result 16,Result 17,Result 18,Result 19,Result 20
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,2801769645,1369317869264248838,3883801823,4841853117,1352034271477837826,...,1272520399973232640,774353219464093696,712359665833795585,905126924397281282,513081418,1263928166546096129,3346441403,764794474144858112,4841988202,409176426
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
results_df.to_csv('Twitter-Search-Fullnames-Parliament-Members.csv', index=False)

---
## Search: Surname

In [13]:
queries = list(data['Surname'])
results = pd.DataFrame()

for query in tqdm(queries):
    try:
        se.search(query=query, count=10)
        result = se.get_results()
        results= results.append(result)
    except Exception as err:
        print(err)

  0%|          | 0/300 [00:00<?, ?it/s]

In [14]:
results.replace(np.nan, 0, inplace=True)

In [16]:
results_df = (results
           .replace(np.nan, '')
           .rename(columns={'Query':'Surname'})
           .reset_index()
           .drop('index', axis=1))
results_df = pd.merge(data, results_df, on=results_df.index, how='outer').drop(['key_0', 'Surname_y'], axis =1).rename(columns={'Surname_x':'Surname'})
results_df.head(5)

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,...,Result 11,Result 12,Result 13,Result 14,Result 15,Result 16,Result 17,Result 18,Result 19,Result 20
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,174226430,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,126143952,93629265,282486425,2596344709,167572986,...,2569485777,2329900741,2694843255,2370663787,4641601163,1373670869613084674,507489429,4209308067,3110162349,1189778967546474498
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,965785002,2840171877,424495304,1847450509,484212704,...,1697401291,1050337249496391681,799200059464318976,3253495824,2466023912,582976052,984469619023994880,705406032,886626800,860197691447549954
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,1977451178,1049215741,1534824312,1264145037929185280,739463983405662208,...,2400905457,1302266183790604290,3167304245,4219002089,1209556114725130240,3351186423,970798547460395008,3244790992,2161086549,704032421696643073
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,533041104,998828010181709825,1148229718627540992,405567257,1216171735,...,3107550591,3029871778,1851852092,863376977331400705,2463333126,842354478,1361908210068836353,4179542776,3172032267,3923205689


In [18]:
results_df.to_csv('Twitter-Search-Surnames-Parliament-Members.csv', index=False)

---
## Search: Surname Greeklish

In [51]:
## https://github.com/gschizas/RomanizePython/blob/master/romanize/__init__.py

def romanize(greek_text):
    """Return the ISO 843:1997 transcription of the input Greek text.
    Any non-Greek characters will be ignored and printed as they were."""

    result = ""
    cursor = 0
    while cursor < len(greek_text):
        letter = greek_text[cursor]
        prev_letter = greek_text[cursor - 1] if cursor > 0 else ""
        next_letter = greek_text[cursor + 1] if cursor < len(greek_text) - 1 else ""
        third_letter = greek_text[cursor + 2] if cursor < len(greek_text) - 2 else ""

        is_upper = (letter.upper() == letter)
        is_upper_next = (next_letter.upper() == next_letter)
        letter = letter.lower()
        prev_letter = prev_letter.lower()
        next_letter = next_letter.lower()
        third_letter = third_letter.lower()

        simple_translation_greek = u'άβδέζήιίϊΐκλνξόπρσςτυύϋΰφωώ'
        simple_translation_latin = u'avdeziiiiiklnxoprsstyyyyfoo'

        digraph_translation_greek = u'θχψ'
        digraph_translation_latin = u'thchps'

        digraph_ypsilon_greek = u'αεη'
        digraph_ypsilon_latin = u'aei'
        digraph_ypsilon_beta = u'βγδζλμνραάεέηήιίϊΐοόυύϋΰωώ'
        digraph_ypsilon_phi = u'θκξπστφχψ'

        if letter in simple_translation_greek:
            new_letter = simple_translation_latin[simple_translation_greek.index(letter)]
        elif letter in digraph_translation_greek:
            diphthong_index = digraph_translation_greek.index(letter)
            new_letter = digraph_translation_latin[diphthong_index * 2:diphthong_index * 2 + 2]
        elif letter in digraph_ypsilon_greek:
            new_letter = digraph_ypsilon_latin[digraph_ypsilon_greek.index(letter)]
            if next_letter in [u'υ', u'ύ']:
                if third_letter in digraph_ypsilon_beta:
                    new_letter += u'v'
                    cursor += 1
                elif third_letter in digraph_ypsilon_phi:
                    new_letter += u'f'
                    cursor += 1
        elif letter == u'γ':
            if next_letter == u'γ':
                new_letter = u'ng'
                cursor += 1
            elif next_letter == u'ξ':
                new_letter = u'nx'
                cursor += 1
            elif next_letter in u'χ':
                new_letter = u'nch'
                cursor += 1
            else:
                new_letter = u'g'
        elif letter == u'μ':
            if next_letter == u'π':
                if prev_letter.strip() == "" or third_letter.strip() == "":
                    new_letter = u'b'
                    cursor += 1
                else:
                    new_letter = u'mp'
                    cursor += 1
            else:
                new_letter = u'm'
        elif letter == u'ο':
            new_letter = u'o'
            if next_letter in [u'υ', u'ύ']:
                new_letter += u'u'
                cursor += 1
        else:
            new_letter = letter
        if is_upper:
            new_letter = new_letter[0].upper() + (new_letter[1:].upper() if is_upper_next else new_letter[1:].lower())
        result += new_letter
        cursor += 1
    return result


In [52]:
queries = list(data['Surname'])
results = pd.DataFrame()

for query in tqdm(queries):
    try:
        se.search(query=romanize(query), count=10)
        
        result = se.get_results()
        results= results.append(result)
    except Exception as err:
        print(err)

  0%|          | 0/300 [00:00<?, ?it/s]

In [53]:
results.replace(np.nan, 0, inplace=True)

In [54]:
results_df = (results
           .replace(np.nan, '')
           .rename(columns={'Query':'Surname'})
           .reset_index()
           .drop('index', axis=1))
results_df = pd.merge(data, results_df, on=results_df.index, how='outer').drop(['key_0', 'Surname_y'], axis =1).rename(columns={'Surname_x':'Surname'})
results_df.head(5)

Unnamed: 0,Name (Long),Surname,Name,Party,Twitter Handle,Result 1,Result 2,Result 3,Result 4,Result 5,...,Result 11,Result 12,Result 13,Result 14,Result 15,Result 16,Result 17,Result 18,Result 19,Result 20
0,Βιλιάρδος Διονυσίου Βασίλειος,Βιλιάρδος,Βιλιάρδος Βασίλειος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,ViliardosV,174226430,246390608,849013729,856448283547320322,1032949155314970624,...,0,0,0,0,0,0,0,0,0,0
1,Αλεξοπούλου Κωνσταντίνου Αναστασία - Αικατερίνη,Αλεξοπούλου,Αλεξοπούλου Αικατερίνη,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,anaik2020,320162191,73234260,3373083677,1187678476977000448,1109429795962662912,...,1168241295149740038,263784628,599366320,171661692,1325123115366707200,576183073,1194241307251789826,51597099,223945318,72824620
2,Αθανασίου Ευαγγέλου Μαρία,Αθανασίου,Αθανασίου Μαρία,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,MariaAt03902914,1088970967,880033291,2591503675,2495322266,103608120,...,35453950,517875912,812762451485818881,230836510,53700015,421787378,1192655092400283648,703958160,711712332,38287046
3,Βαγενάς Κωνσταντίνου Δημήτριος,Βαγενάς,Βαγενάς Δημήτριος,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,B3Vagenas,253071752,3340651392,808532589640171520,277694245,879300336,...,63101531,538678651,403661679,137151534,1049215741,1002016027,897883385745551360,3291502453,1118028346145824771,90375469
4,Ασημακοπούλου Δημητρίου Σοφία - Χάιδω,Ασημακοπούλου,Ασημακοπούλου Χάιδω,ΕΛΛΗΝΙΚΗ ΛΥΣΗ,SofAsimak,533041104,714120024,2829422892,1305799456080695296,2249352493,...,1342908256591736834,275945170,1127930818712743936,2436824634,3814339281,931266633351516160,813814300968108032,1276660778519670786,2569601167,1188808110074413057


In [55]:
results_df.to_csv('Twitter-Search-SurnamesLatin-Parliament-Members.csv', index=False)