In [242]:
from dotenv import load_dotenv
from pathlib import Path
import os
import csv
import tweepy
import json
import datetime
import pandas as pd
import networkx as nx
from yandex.Translater import Translater
import swifter

In [232]:
#file containing API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
twitter_env_path = Path('~') / 'credentials/.twitter'
yandex_env_path = Path('~') / 'credentials/.yandex' #using yandex as it is free

In [233]:
load_dotenv(dotenv_path=twitter_env_path.expanduser())
load_dotenv(dotenv_path=yandex_env_path.expanduser())

True

In [230]:
def get_api():
    if os.environ.get('API_KEY'):
        auth = tweepy.OAuthHandler(os.environ['TWITTER_API_KEY'], os.environ['TWITTER_API_SECRET_KEY'])
        auth.set_access_token(os.environ['TWITTER_ACCESS_TOKEN'], os.environ['TWITTER_ACCESS_TOKEN_SECRET'])
        api = tweepy.API(auth)
        if api.verify_credentials().screen_name:
            return api
        else:
            raise Exception('Auth Error')
    else:
        raise Exception('Environment Variable not set')

In [211]:
api = get_api()

In [212]:
def get_tweets(name, tweet_id, api=api):
    replies=[]
    for tweet in api.user_timeline(screen_name='LukasHenselEcon', since_id="1238972871323652097", tweet_mode='extended', count=50):
        replies.append(tweet)
    return replies

In [213]:
def create_df(replies):
    output = pd.DataFrame()
    for tweet in replies:
            row = {
                    'user': tweet.user.screen_name.replace('=', ''),
                    'text': tweet.full_text.replace('\n', ' ').replace('=', ''),
                    'created_at': tweet.created_at,
                    'in_reply_to_screen_name': tweet.in_reply_to_screen_name,
                    'in_reply_to_status_id_str': tweet.in_reply_to_status_id_str,
                    'id': tweet.id_str
                  }
            output = output.append(row, ignore_index=True)

    a = output.dropna(how='any').apply(lambda col: (col['id'], col['in_reply_to_status_id_str']), axis=1).tolist()
    g1 = nx.Graph(a)
    g2 = list(nx.connected_components(g1))
    a = pd.DataFrame([(list(item), ind) for ind, item in enumerate(g2)], columns=['ids', 'cluster'])
    b = a.set_index('cluster')['ids'].apply(pd.Series).stack().reset_index(level=1, drop=True).to_frame('id').reset_index()
    output = output.merge(b, on='id')
    cluster_id = output.loc[output['id']==tweet_id]['cluster'].values[0]
    output = output.loc[output['cluster']==cluster_id]
    output = output.sort_values(by='created_at')
    output['created_at'] = output['created_at'].apply(lambda x: x.strftime("%m/%d/%Y, %H:%M:%S"))
    return output

In [214]:
def get_thread(name, tweet_id, api=api):
    tweets = get_tweets(name, tweet_id, api=api)
    return create_df(replies)

In [215]:
output = get_thread('LukasHenselEcon', '1240494951177302016')

In [243]:
def translate_text(text, from_lang='en', to_lang='hi'):
    '''
    supported languages: https://tech.yandex.com/translate/doc/dg/concepts/api-overview-docpage/
    '''
    tr = Translater()
    tr.set_key(os.environ['YANDEX_KEY'])
    tr.set_from_lang(from_lang)
    tr.set_to_lang(to_lang)
    tr.text = text
    return tr.translate()

In [245]:
output['translated_text'] = output['text'].swifter.apply(translate_text)

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=18.0, style=ProgressStyle(description_…




In [253]:
output.head()

Unnamed: 0,created_at,id,in_reply_to_screen_name,in_reply_to_status_id_str,text,user,cluster,translated_text
27,"03/19/2020, 04:26:58",1240494951177302016,,,My partner and I traveled from London Heathrow...,LukasHenselEcon,10,"मेरे साथी और मैं से कूच लंदन के हीथ्रो, बीजिंग..."
26,"03/19/2020, 04:42:24",1240498836583284736,LukasHenselEcon,1.240494951177302e+18,Heathrow airport was quite busy. We wore facem...,LukasHenselEcon,10,हीथ्रो हवाई अड्डा काफी व्यस्त था. हम पहनी थी f...
25,"03/19/2020, 04:42:25",1240498840542875648,LukasHenselEcon,1.2404988365832847e+18,Aside: My perceptions of social norms in Europ...,LukasHenselEcon,10,एक तरफ: मेरी धारणा सामाजिक मानदंडों के यूरोप म...
24,"03/19/2020, 04:42:26",1240498844921548806,LukasHenselEcon,1.2404988405428756e+18,An interesting question is whether COVID-19 wi...,LukasHenselEcon,10,एक दिलचस्प सवाल यह है कि क्या COVID-19 बदल जाए...
23,"03/19/2020, 04:42:28",1240498853431758849,LukasHenselEcon,1.2404988449215488e+18,"During our trip, temperature measurement was v...",LukasHenselEcon,10,"के दौरान हमारे यात्रा, तापमान माप था बहुत अक्स..."


In [252]:
output.to_csv('translated_tweets.csv')