### **Package Importation and Data Acquisition**

In [1]:
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np
import datetime as dt
import pickle

In [2]:
df = pd.read_csv("sampled_no_retweet_df.csv", index_col=0)

In [3]:
# Checking some rows of the data 
df.head()

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,lang,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc
1,1.395719e+18,Fri May 21 12:32:11 +0000 2021,2021-05-21,#covidlong #LongCovid #apresJ20,0.0,0.0,0.0,0.0,False,False,und,1621600000000.0,,1298867975957684224,Florence COVID19 🦠🦠💉💉 (Covid Long),Covid19Florence,Premiers symptômes Covid 19 : le 09/03/2020,197,Thu Aug 27 06:20:46 +0000 2020,"Ile-de-France, France"
2,1.395741e+18,Fri May 21 13:57:35 +0000 2021,2021-05-21,#apresJ20 \n🤔\nhttps://t.co/H6mqURHflJ,0.0,0.0,0.0,0.0,False,False,und,1621605000000.0,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,
3,1.395741e+18,Fri May 21 14:01:16 +0000 2021,2021-05-21,#apresJ20 \nSuramine l antidote anti covid 🤔\n...,0.0,0.0,0.0,0.0,False,False,it,1621606000000.0,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,
4,1.397092e+18,Tue May 25 07:28:54 +0000 2021,2021-05-25,#J451 - L’article de @SuzanneBruneau1 sur mon ...,0.0,0.0,0.0,0.0,False,False,fr,1621928000000.0,,37757950,Véronique Le Thiec 🍓Choyée Contaminée Réinfectée,lethiecv,"Parisienne, Dyonisienne, Bretonne, Française, ...",1442,Mon May 04 20:57:31 +0000 2009,
6,1.397449e+18,Wed May 26 07:05:47 +0000 2021,2021-05-26,#apresJ20,0.0,0.0,0.0,0.0,False,False,und,1622013000000.0,,120320839,ninasky,ninasky31000,mako assé véyé mwen\n❤️💚🖤\n#apresJ20 #covidlong,154,Sat Mar 06 03:13:24 +0000 2010,France


### **Helper Functions Definition**

In [4]:
from deep_translator import GoogleTranslator

def translate_tweet(tweet, target_language='en'):
    translated_text = ""
    try:
        translated_text = GoogleTranslator(source='auto', target=target_language).translate(tweet)
    except Exception as e:
        print(e)
    return translated_text
        

In [5]:
from deep_translator import GoogleTranslator

def translate_tweets(tweets, target_language='en'):
    return [translate_tweet(tweet) for tweet in tweets if isinstance(tweet, str)]

### **Translation**

In [77]:
non_english_tweets = df[df['lang']!='en']

In [78]:
non_english_tweets

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,...,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc,translation
1,1.395719e+18,Fri May 21 12:32:11 +0000 2021,2021-05-21,#covidlong #LongCovid #apresJ20,0.0,0.0,0.0,0.0,False,False,...,1.621600e+12,,1298867975957684224,Florence COVID19 🦠🦠💉💉 (Covid Long),Covid19Florence,Premiers symptômes Covid 19 : le 09/03/2020,197,Thu Aug 27 06:20:46 +0000 2020,"Ile-de-France, France",#covidlong #LongCovid # apresJ20
2,1.395741e+18,Fri May 21 13:57:35 +0000 2021,2021-05-21,#apresJ20 \n🤔\nhttps://t.co/H6mqURHflJ,0.0,0.0,0.0,0.0,False,False,...,1.621605e+12,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,,# apresJ20\n🤔\nhttps://t.co/H6mqURHflJ
3,1.395741e+18,Fri May 21 14:01:16 +0000 2021,2021-05-21,#apresJ20 \nSuramine l antidote anti covid 🤔\n...,0.0,0.0,0.0,0.0,False,False,...,1.621606e+12,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,,#apresJ20 \nSuramine l antidote anti covid 🤔\n...
4,1.397092e+18,Tue May 25 07:28:54 +0000 2021,2021-05-25,#J451 - L’article de @SuzanneBruneau1 sur mon ...,0.0,0.0,0.0,0.0,False,False,...,1.621928e+12,,37757950,Véronique Le Thiec 🍓Choyée Contaminée Réinfectée,lethiecv,"Parisienne, Dyonisienne, Bretonne, Française, ...",1442,Mon May 04 20:57:31 +0000 2009,,#J451 - @SuzanneBruneau1's article on my #Long...
6,1.397449e+18,Wed May 26 07:05:47 +0000 2021,2021-05-26,#apresJ20,0.0,0.0,0.0,0.0,False,False,...,1.622013e+12,,120320839,ninasky,ninasky31000,mako assé véyé mwen\n❤️💚🖤\n#apresJ20 #covidlong,154,Sat Mar 06 03:13:24 +0000 2010,France,# apresJ20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305843,1.513767e+18,Tue Apr 12 06:32:57 +0000 2022,2022-04-12,Genau das! Und um noch einen Tweet zu zitieren...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,1045764776,Uwe Isonfire,IsonfireUwe,er/ihm,60,Sat Dec 29 19:25:17 +0000 2012,Duesseldorf,"Exactly! And to quote another tweet: ""Wear a m..."
305852,1.513767e+18,Tue Apr 12 06:33:10 +0000 2022,2022-04-12,Sigo tragando sushi... Por qué ni #lyme ni #sa...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,93092910,ℭ𝔞𝔫𝔠𝔦ó𝔫 𝔡𝔢𝔩 𝔐𝔞𝔯,marlozanormz,Lic. en Admon de Negocios Internacionales. \nL...,205,Sat Nov 28 01:38:29 +0000 2009,"Querétaro Arteaga, México",I keep swallowing sushi... Why neither #lyme n...
305873,1.513767e+18,Tue Apr 12 06:33:31 +0000 2022,2022-04-12,@MarcoZueri https://t.co/QkU1Yks73I,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,199748916,Andre Lachat,baldeagle_66,,70,Thu Oct 07 16:58:47 +0000 2010,Schweiz,@MarcoZueri https://t.co/QkU1Yks73I
305890,1.513767e+18,Tue Apr 12 06:34:02 +0000 2022,2022-04-12,Gerade am Hören: Long Covid und Laufen - Alles...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,1309031025524252676,R;,SociofugalSpace,♀️she/her/hers/PoC\n🎓Kulturanthro./Relwiss./Ge...,160,Thu Sep 24 07:25:22 +0000 2020,Earth,Just Listening: Long Covid and Running - Every...


In [79]:
non_en_de_tweets = non_english_tweets[non_english_tweets['lang']!='de']

In [80]:
non_en_de_und_tweets = non_en_de_tweets[non_en_de_tweets['lang']!='und']

In [81]:
non_en_de_und_nl_tweets = non_en_de_und_tweets[non_en_de_und_tweets['lang']!='nl']

In [82]:
non_en_de_und_nl_es_tweets = non_en_de_und_nl_tweets[non_en_de_und_nl_tweets['lang']!='es']

In [83]:
non_en_de_und_nl_es_fr_tweets = non_en_de_und_nl_es_tweets[non_en_de_und_nl_es_tweets['lang']!='fr']

In [84]:
non_en_de_und_nl_es_fr_it_tweets = non_en_de_und_nl_es_fr_tweets[non_en_de_und_nl_es_fr_tweets['lang']!='it']

In [85]:
non_en_de_und_nl_es_fr_it_tweets

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,...,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc,translation
12,1.402205e+18,Tue Jun 08 10:05:15 +0000 2021,2021-06-08,"Histamine, MCAS, #LongCovid \n\nLa piste (agai...",0.0,0.0,0.0,0.0,False,False,...,1.623147e+12,,1223997798108008450,Martine Mounier 🏳️‍🌈,reverseyourmind,Long Covid since 2/2020 #FBLC #ZeroCovid She/Her,894,Sun Feb 02 15:54:03 +0000 2020,"Grenoble, France",
1129,1.443103e+18,Wed Sep 29 06:39:25 +0000 2021,2021-09-29,Mini 🧵 on #CountLongCovid estimates,0.0,0.0,0.0,0.0,False,False,...,1.632898e+12,,149522272,Dr Nisreen Alwan 🌻,Dr2NisreenAlwan,Associate Professor in Public Health @unisouth...,64965,Sat May 29 14:14:47 +0000 2010,"Hampshire, UK",
2279,1.443967e+18,Fri Oct 01 15:50:55 +0000 2021,2021-10-01,"Ca şi adulții, copiii şi adolescenţii pot sufe...",0.0,0.0,0.0,0.0,False,False,...,1.633103e+12,,1430305273189597193,Covid Lung România,LongCovidRo,,11,Tue Aug 24 23:05:58 +0000 2021,,
3201,1.466084e+18,Wed Dec 01 16:36:24 +0000 2021,2021-12-01,Tik Tak Tik Tak Tik Tak\n#Pfizergate \n#Modern...,0.0,0.0,0.0,0.0,False,False,...,1.638377e+12,,1427666892832088068,Mateo Renaldi,MateoRenaldi,,9,Tue Aug 17 16:21:35 +0000 2021,,
4211,1.503724e+18,Tue Mar 15 13:23:28 +0000 2022,2022-03-15,#COVID19 #CovidLong\n#Omicron\nMi avril 2022 ?,0.0,0.0,0.0,0.0,False,False,...,1.647351e+12,,759045062890131460,Vinciane ☮,de_perrier,Ψ #Psychopathoclinique #PsychoOnco #Droit • re...,475,Fri Jul 29 15:17:00 +0000 2016,Sudiste en Heulaland - en 🇨🇵,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305548,1.513766e+18,Tue Apr 12 06:27:11 +0000 2022,2022-04-12,สิ่งที่แย่ที่สุดคือกินน้ำอัดลมแล้วรสชาติไม่เหม...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,1260599007946002432,เฟอเรทที่โสดตอนฤดูผสมพันธุ์,jam_satoberry,GOT7 • Mamamoo • Day6 • WINNER • Stray Kids • ...,30,Wed May 13 15:53:32 +0000 2020,,
305565,1.513766e+18,Tue Apr 12 06:27:32 +0000 2022,2022-04-12,@nusret254 Hocam Naci olayında da TEB long cov...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,987005763381026816,Metin,metin1907u,,34,Thu Apr 19 16:31:26 +0000 2018,"İstanbul, Türkiye",
305605,1.513766e+18,Tue Apr 12 06:28:14 +0000 2022,2022-04-12,eu também deal with it,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,3429222323,luis (taylor's version),thundergoat22,e que seja o que deus quiser que para isso está,436,Tue Aug 18 01:32:39 +0000 2015,ele/luis,
305806,1.513767e+18,Tue Apr 12 06:32:10 +0000 2022,2022-04-12,แค่เดินข้ามฝั่งถนน ไปกินข้าว แล้วรีบเดินกลับมา...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,1176313315787542528,LianHua 🌸R1SE YYDS ❤️⚡️R1SE十二爱你❤️⚡️ คีพ 11 ⚡️❤️,lianhua190996,🦁💚 จ้านเกอตี้ตีอ้ายหนี่❤️🐰 天官赐福 百无禁忌❤️🤍#เดฟแฟม...,464,Tue Sep 24 01:52:18 +0000 2019,ดาวดวงที่12⚡️,


In [15]:
translated_non_en_de_und_nl_es_fr_it_tweets = translate_tweets(non_en_de_und_nl_es_fr_it_tweets['full_tweet'])

In [30]:
und_tweets = df[df['lang']=='und']

In [31]:
und_tweets

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,lang,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc
1,1.395719e+18,Fri May 21 12:32:11 +0000 2021,2021-05-21,#covidlong #LongCovid #apresJ20,0.0,0.0,0.0,0.0,False,False,und,1.621600e+12,,1298867975957684224,Florence COVID19 🦠🦠💉💉 (Covid Long),Covid19Florence,Premiers symptômes Covid 19 : le 09/03/2020,197,Thu Aug 27 06:20:46 +0000 2020,"Ile-de-France, France"
2,1.395741e+18,Fri May 21 13:57:35 +0000 2021,2021-05-21,#apresJ20 \n🤔\nhttps://t.co/H6mqURHflJ,0.0,0.0,0.0,0.0,False,False,und,1.621605e+12,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,
6,1.397449e+18,Wed May 26 07:05:47 +0000 2021,2021-05-26,#apresJ20,0.0,0.0,0.0,0.0,False,False,und,1.622013e+12,,120320839,ninasky,ninasky31000,mako assé véyé mwen\n❤️💚🖤\n#apresJ20 #covidlong,154,Sat Mar 06 03:13:24 +0000 2010,France
8,1.399296e+18,Mon May 31 09:26:09 +0000 2021,2021-05-31,#LongCovid \n#covidlong \n#apresJ20\n\nhttps:/...,0.0,0.0,0.0,0.0,False,False,und,1.622453e+12,,1275237864687665153,Eniluap,Eniluapao,Used to be a nurse before getting PACS.\n\nJ'a...,144,Tue Jun 23 01:23:06 +0000 2020,Hell
17,1.405085e+18,Wed Jun 16 08:48:19 +0000 2021,2021-06-16,#apresJ20\n#covidlong\n@apresj20\n👇🏻\nhttps://...,0.0,0.0,0.0,0.0,False,False,und,1.623833e+12,,994734254,Amélie Perrier,Am3liePerrier,#bzh #Running #Trail #PRlife\nAP comme Attaché...,506,Fri Dec 07 10:01:01 +0000 2012,Bretagne
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305517,1.513766e+18,Tue Apr 12 06:26:39 +0000 2022,2022-04-12,@people https://t.co/TCp2nLXAGh,0.0,0.0,0.0,0.0,False,False,und,1.649745e+12,,1497418971200561152,MFalcon,falconMillenniu,.,29,Sat Feb 26 03:51:43 +0000 2022,
305540,1.513766e+18,Tue Apr 12 06:27:03 +0000 2022,2022-04-12,@theroyaleditor https://t.co/TCp2nLXAGh,0.0,0.0,0.0,0.0,False,False,und,1.649745e+12,,1497418971200561152,MFalcon,falconMillenniu,.,29,Sat Feb 26 03:51:43 +0000 2022,
305643,1.513766e+18,Tue Apr 12 06:28:53 +0000 2022,2022-04-12,#JohnsonPartiedThenLied\n#Partygate\n#LawsAppl...,0.0,0.0,0.0,0.0,False,False,und,1.649745e+12,,2226919858,H44🇺🇦#RejoinEU🇺🇦,rubymournie,"💙, CEV, hate Tories, DUP and Brexit. Pro Unite...",1936,Sun Dec 15 10:48:23 +0000 2013,Northern Ireland
305780,1.513767e+18,Tue Apr 12 06:31:49 +0000 2022,2022-04-12,🤣🤣🤣🤣🤣,0.0,0.0,0.0,0.0,False,False,und,1.649745e+12,,2265397625,MrGreen,0606Green,birthday is 6th June. The week of the Coronati...,1085,Mon Jan 06 15:30:10 +0000 2014,A Scot now living in Southsea


In [39]:
translated_und_tweets = translate_tweets(und_tweets['full_tweet'])

1979 --> text must be a valid text with maximum 5000 character, otherwise it cannot be translated


In [40]:
import pickle

with open('translated_non_retweeted_und_tweets_2022_04_12-08_34_59_AM.pkl', 'wb') as f:
    pickle.dump(translated_und_tweets, f)

In [41]:
len(translated_und_tweets)

4753

In [43]:
translated_und_tweets

['#covidlong #LongCovid # apresJ20',
 '# apresJ20\n🤔\nhttps://t.co/H6mqURHflJ',
 '# apresJ20',
 '#LongCovid\n#covidlong\n# apresJ20\n\nhttps://t.co/KMPUfN71pI',
 '#apresJ20\n#covidlong\n@apresj20\n👇🏻\nhttps://t.co/jnibDRl3XM',
 '#LongCovid #paslagrippe #apresJ20 #Covid_19',
 '#apresJ20 #CovidLong ⬇️⬇️⬇️',
 '#apresJ20 #covidlong #COVID19',
 'https://t.co/wuotEiHj7L # apresJ20',
 '#apresJ20 #LongCovid @apresj20',
 '# apresJ20',
 '😭😭 #apresJ20 #covidlong',
 'https://t.co/UysH6a6eLc\n\n#covidlong\n#apresJ20',
 '# apresJ20 https://t.co/oTad00VsgF',
 '#covidlong #apresJ20 https://t.co/TTrJfMjtT9',
 '#apresJ20 #covidlong https://t.co/Dnc9fzK8EB',
 '# apresJ20',
 '@TatianaVentose\n@apresj20 @Food_reward\n#covidlong #apresJ20\n\nhttps://t.co/XihSmEsbL9',
 '# apresJ20',
 '# apresJ20 https://t.co/QXCGceoRDb',
 '#apresJ20 #LongCovid',
 '#apresJ20 #covidlong👇',
 '#apresJ20 #LongCovid https://t.co/0AX9FinVIH',
 '#covidlong #apresJ20',
 '#apresJ20 #covidlong',
 '# apresJ20\n#CovidLong #LongCovid',
 '

In [97]:
with open('translated_non_retweeted_remaining_tweets_2022_04_12-08_34_59_AM.pkl', 'rb') as f:
    translated_remaining_tweets= pickle.load(f)

In [99]:
len(translated_remaining_tweets)

6011

In [75]:
df[df['lang']=='es'] = df[df['lang']=='es'].assign(translation=translated_spanish_tweets)

In [104]:
df[df['lang']=='es'][['full_tweet', 'lang', 'translation']]

Unnamed: 0,full_tweet,lang,translation
987,#CountLongCovid - esto va a todo correr.\n\n#...,es,
1244,#LongCovid: codificar es cuidar.\n#CountLongCo...,es,
3410,Estudio francés: De 27 000 personas q sufriero...,es,"French study: Of 27,000 people who suffered #C..."
4588,Datos de la @SEMG_ES sobre efectos de la #vacu...,es,Data from the @SEMG_ES on the effects of #vacc...
4598,¡Reportaje exclusivo! \n#Disautonomia: nombre ...,es,Exclusive report!\n#Dysautonomia: medical name...
...,...,...,...
305222,En Valencia también queremos un hospital para ...,es,In Valencia we also want a hospital for patien...
305340,- Los cincogés conocen a cientos de personas c...,es,- The Cincogés know hundreds of people with si...
305406,@elmundoes Gracias @IdiazAyuso por dedicar rec...,es,@elmundoes Thank you @IdiazAyuso for dedicatin...
305514,Más deportistas con COVID persistente o con da...,es,More athletes with persistent COVID or with da...


In [94]:
non_en_de_und_nl_es_fr_it_tweets.index

Int64Index([    12,   1129,   2279,   3201,   4211,   4313,   4549,   4606,
              4777,   4801,
            ...
            305057, 305149, 305187, 305255, 305313, 305548, 305565, 305605,
            305806, 305898],
           dtype='int64', length=6011)

In [100]:
df.loc[non_en_de_und_nl_es_fr_it_tweets.index] = df.loc[non_en_de_und_nl_es_fr_it_tweets.index].assign(translation=translated_remaining_tweets)

In [103]:
df.loc[non_en_de_und_nl_es_fr_it_tweets.index][['full_tweet', 'lang', 'translation']]

Unnamed: 0,full_tweet,lang,translation
12,"Histamine, MCAS, #LongCovid \n\nLa piste (agai...",et,"Histamine, MCAS, #LongCovid \n\nLa piste (agai..."
1129,Mini 🧵 on #CountLongCovid estimates,et,Mini 🧵 on #CountLongCovid estimates
2279,"Ca şi adulții, copiii şi adolescenţii pot sufe...",ro,"Like adults, children and teens can suffer fro..."
3201,Tik Tak Tik Tak Tik Tak\n#Pfizergate \n#Modern...,in,Tik Tak Tik Tak Tik Tak\n#Pfizergate\n#Moderna...
4211,#COVID19 #CovidLong\n#Omicron\nMi avril 2022 ?,tr,#COVID19 #CovidLong\n#Omicron\nMi avril 2022 ?
...,...,...,...
305548,สิ่งที่แย่ที่สุดคือกินน้ำอัดลมแล้วรสชาติไม่เหม...,th,The worst thing is that drinking soda doesn't ...
305565,@nusret254 Hocam Naci olayında da TEB long cov...,tr,"@nusret254 Teacher, I said it that day, both i..."
305605,eu também deal with it,pt,me too deal with it
305806,แค่เดินข้ามฝั่งถนน ไปกินข้าว แล้วรีบเดินกลับมา...,th,"Just walk across the street to eat, then hurri..."


In [102]:
df.loc[non_en_de_und_nl_es_fr_it_tweets.index][['full_tweet', 'lang', 'translation']]

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,...,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc,translation
7,1.398207e+18,Fri May 28 09:18:30 +0000 2021,2021-05-28,#COVIDー19 #apresJ20 #apresJ60 #apresJ90 #Covid...,0.0,0.0,0.0,0.0,False,False,...,1.622194e+12,,850683077114249216,チャンネコ。,channeco3,"(元)長期微熱組。掛けたくなったら鍵を掛けます。In April 2020, I was t...",51,Sat Apr 08 12:13:45 +0000 2017,,
15,1.403281e+18,Fri Jun 11 09:21:00 +0000 2021,2021-06-11,#COVIDー19 #apresJ20 #apresJ60 #apresJ90 #Covid...,0.0,0.0,0.0,0.0,False,False,...,1.623403e+12,,850683077114249216,チャンネコ。,channeco3,"(元)長期微熱組。掛けたくなったら鍵を掛けます。In April 2020, I was t...",51,Sat Apr 08 12:13:45 +0000 2017,,
25,1.405452e+18,Thu Jun 17 09:08:37 +0000 2021,2021-06-17,#COVIDー19 #apresJ20 #apresJ60 #apresJ90 #Covid...,0.0,0.0,0.0,0.0,False,False,...,1.623921e+12,,850683077114249216,チャンネコ。,channeco3,"(元)長期微熱組。掛けたくなったら鍵を掛けます。In April 2020, I was t...",51,Sat Apr 08 12:13:45 +0000 2017,,
30,1.405802e+18,Fri Jun 18 08:16:26 +0000 2021,2021-06-18,#COVIDー19 #apresJ20 #apresJ60 #apresJ90 #Covid...,0.0,0.0,0.0,0.0,False,False,...,1.624004e+12,,850683077114249216,チャンネコ。,channeco3,"(元)長期微熱組。掛けたくなったら鍵を掛けます。In April 2020, I was t...",51,Sat Apr 08 12:13:45 +0000 2017,,
39,1.407270e+18,Tue Jun 22 09:31:39 +0000 2021,2021-06-22,#COVIDー19 #apresJ20 #apresJ60 #apresJ90 #Covid...,0.0,0.0,0.0,0.0,False,False,...,1.624354e+12,,850683077114249216,チャンネコ。,channeco3,"(元)長期微熱組。掛けたくなったら鍵を掛けます。In April 2020, I was t...",51,Sat Apr 08 12:13:45 +0000 2017,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305909,1.513767e+18,Tue Apr 12 06:34:31 +0000 2022,2022-04-12,"Dang, I needed this. (Thread.)",0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,15853730,t.t.,tatr,Writer | Opportunistic gardener | Lover of Bro...,911,Thu Aug 14 18:20:51 +0000 2008,Baltimore,
305911,1.513768e+18,Tue Apr 12 06:34:31 +0000 2022,2022-04-12,👌This is how we need Psychologists to help us ...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,1416846409865867267,Long Covid Advocacy💙,LongCovidAdvoc,Raising Awareness for #LongCovid & #MECFS \nCa...,5236,Sun Jul 18 19:45:00 +0000 2021,Plague Island,
305917,1.513768e+18,Tue Apr 12 06:34:40 +0000 2022,2022-04-12,@MarcBrup long covid n'existe pas,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,187404218,Beat Rüedi,brueedi,Erinnere mich bitte nicht an meine Vergangenhe...,449,Mon Sep 06 03:42:10 +0000 2010,Thayngen und Berlin,
305918,1.513768e+18,Tue Apr 12 06:34:41 +0000 2022,2022-04-12,@MarketingUcom @DCDoc33 Getting healthy and fi...,0.0,0.0,0.0,0.0,False,False,...,1.649745e+12,,1322304434198605825,Sasquatch,Sasquatcher2,👀 👀 👀,343,Fri Oct 30 22:28:59 +0000 2020,,


In [116]:
df[df['translation'].isnull() == True]

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,...,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc,translation


In [112]:
df[df['translation'].isnull() == True] = df[df['translation'].isnull() == True].assign(translation=df[df['translation'].isnull() == True]['full_tweet'].to_list())

In [113]:
df[df['translation'].isnull() == True]['translation']

Series([], Name: translation, dtype: object)

In [118]:
df.to_csv("sampled_no_retweet_df_translated.csv")

In [119]:
translated_df = pd.read_csv("sampled_no_retweet_df_translated.csv", index_col=0)
translated_df.head()

Unnamed: 0,id,created_at,created_at_ymd,full_tweet,quote_count,reply_count,retweet_count,favorite_count,favorited,retweeted,...,timestamp_ms,retweeted_status,user_id,name,screen_name,description,followers_count,user_created_at,loc,translation
1,1.395719e+18,Fri May 21 12:32:11 +0000 2021,2021-05-21,#covidlong #LongCovid #apresJ20,0.0,0.0,0.0,0.0,False,False,...,1621600000000.0,,1298867975957684224,Florence COVID19 🦠🦠💉💉 (Covid Long),Covid19Florence,Premiers symptômes Covid 19 : le 09/03/2020,197,Thu Aug 27 06:20:46 +0000 2020,"Ile-de-France, France",#covidlong #LongCovid # apresJ20
2,1.395741e+18,Fri May 21 13:57:35 +0000 2021,2021-05-21,#apresJ20 \n🤔\nhttps://t.co/H6mqURHflJ,0.0,0.0,0.0,0.0,False,False,...,1621605000000.0,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,,# apresJ20\n🤔\nhttps://t.co/H6mqURHflJ
3,1.395741e+18,Fri May 21 14:01:16 +0000 2021,2021-05-21,#apresJ20 \nSuramine l antidote anti covid 🤔\n...,0.0,0.0,0.0,0.0,False,False,...,1621606000000.0,,1325373633502572544,Béret Vert,Bretvert1,LA LEGION FRANÇAISE Veut faire\nLA RÉVOLUTION ...,64,Sun Nov 08 09:44:52 +0000 2020,,#apresJ20 \nSuramine l antidote anti covid 🤔\n...
4,1.397092e+18,Tue May 25 07:28:54 +0000 2021,2021-05-25,#J451 - L’article de @SuzanneBruneau1 sur mon ...,0.0,0.0,0.0,0.0,False,False,...,1621928000000.0,,37757950,Véronique Le Thiec 🍓Choyée Contaminée Réinfectée,lethiecv,"Parisienne, Dyonisienne, Bretonne, Française, ...",1442,Mon May 04 20:57:31 +0000 2009,,#J451 - @SuzanneBruneau1's article on my #Long...
6,1.397449e+18,Wed May 26 07:05:47 +0000 2021,2021-05-26,#apresJ20,0.0,0.0,0.0,0.0,False,False,...,1622013000000.0,,120320839,ninasky,ninasky31000,mako assé véyé mwen\n❤️💚🖤\n#apresJ20 #covidlong,154,Sat Mar 06 03:13:24 +0000 2010,France,# apresJ20


In [121]:
translated_df[['lang', 'full_tweet', 'translation']][3000:6000]

Unnamed: 0,lang,full_tweet,translation
12761,en,"This year, we have #ChildreninNeed of #Johnson...","This year, we have #ChildreninNeed of #Johnson..."
12766,en,@MIHouseDems How many cases of #LongCovid &amp...,@MIHouseDems How many cases of #LongCovid &amp...
12767,en,Do we accept children being exposed to #COVID1...,Do we accept children being exposed to #COVID1...
12773,en,"WE SHOULD SUE SCHOOLS, GOVT, and ALL INVOLVED ...","WE SHOULD SUE SCHOOLS, GOVT, and ALL INVOLVED ..."
12784,nl,@KimBoon94 #LongCovid en #LongCovidKids stelt ...,@KimBoon94 #LongCovid and #LongCovidKids are n...
...,...,...,...
25431,es,El covid persistente afecta menos a los niños ...,Persistent covid affects children and adolesce...
25433,es,Ya anda un médico en la secta minimizando el c...,A doctor is already in the sect minimizing per...
25443,es,"Que curioso, ahora que se baraja la tercera do...","How curious, now that the third dose is being ..."
25445,es,@elmundoes En Reino Unido hay más de 30.000 ni...,"@elmundoes In the UK there are more than 30,00..."
