In [1]:
import pandas as pd
import numpy as np
import textrank
import sklearn
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import pickle

In [2]:
col_labels = ['Date','AppID','AppName','Lang','1', '2', '3', 'Author','Stars','Title','Review','Reply','Device','DeviceType','Tags','Updated']

In [3]:
df = pd.read_csv('reviews_googleplay_1507788606.csv', 
                  skiprows=4, engine='python', names = col_labels, index_col=False,
                  error_bad_lines=True
                 )

In [4]:
df_1 = df.drop(['AppID','AppName', '1', '2', '3', 'Device','DeviceType','Tags','Updated'], axis =1)

In [5]:
df_1 

Unnamed: 0,Date,Lang,Author,Stars,Title,Review,Reply
0,\t2017-10-11,pt,Renato Morais,5,,Muito bom,
1,\t2017-10-11,de,,4,,Scheint eine gute App zu sein. Sehr √ºbersichtl...,
2,\t2017-10-11,pt,,5,,Perfeito. Consigo controlar todos os dados imp...,
3,\t2017-10-11,ru,Eva Lollipop,5,,"–û–≥—Ä–æ–º–Ω–æ–µ —Å–ø–∞—Å–∏–±–æ, –∑–∞ –≤–∞—à —Ç—Ä—É–¥! –≠—Ç–æ –ª—É—á—à–µ –ø—Ä–∏–ª–æ...",
4,\t2017-10-11,es,,5,,Excelente,
5,\t2017-10-11,pt,Larissa Campos,4,,Troquei de celular e perdi todos os registros ...,
6,\t2017-10-11,ko,Seokyeong Jeong,3,,"Ï¢ãÍ∏∞ÎäîÌïúÎç∞ ÏàòÏú†Ï§ë Îí§Î°úÍ∞ÄÍ∏∞ÌïòÎ©¥ ÏàòÏú†Í∞Ä ÎÅùÎÇòÎäîÍ∞Ä ÌïòÎ©¥, Ïù¥Ï†ÑÏóê ÏôºÏ™Ω ÏàòÏú†ÌñàÎäîÎç∞ Ïò§Î•∏Ï™Ω...",
7,\t2017-10-10,pt,,5,,Muito bom consigo amamentar minha bebe nas hor...,
8,\t2017-10-10,sr,,5,,Prava stvar za mame,
9,\t2017-10-10,de,,4,,"Alles super, klasse App f√ºr alles Wichtige. Di...",


In [6]:
from yandex_translate import YandexTranslate
translate = YandexTranslate('trnsl.1.1.20171012T153315Z.27f7b7ad41361c04.5bb573e2c4533a4aeafa7deeba56e203de72b930')
print('Languages:', translate.langs)
print('Translate directions:', translate.directions)
print('Detect language:', translate.detect('–ü—Ä–∏–≤–µ—Ç, –º–∏—Ä!'))
print('Translate:', translate.translate('–ü—Ä–∏–≤–µ—Ç, –º–∏—Ä!', 'ru-en'))  # or just 'en'

Languages: {'nl', 'sl', 'pl', 'tr', 'mk', 'es', 'fi', 'cs', 'ro', 'de', 'da', 'az', 'sr', 'el', 'et', 'bg', 'ca', 'lt', 'ru', 'be', 'fr', 'it', 'no', 'hy', 'sv', 'hu', 'pt', 'uk', 'lv', 'en', 'hr', 'sq', 'sk'}
Translate directions: ['az-ru', 'be-bg', 'be-cs', 'be-de', 'be-en', 'be-es', 'be-fr', 'be-it', 'be-pl', 'be-ro', 'be-ru', 'be-sr', 'be-tr', 'bg-be', 'bg-ru', 'bg-uk', 'ca-en', 'ca-ru', 'cs-be', 'cs-en', 'cs-ru', 'cs-uk', 'da-en', 'da-ru', 'de-be', 'de-en', 'de-es', 'de-fr', 'de-it', 'de-ru', 'de-tr', 'de-uk', 'el-en', 'el-ru', 'en-be', 'en-ca', 'en-cs', 'en-da', 'en-de', 'en-el', 'en-es', 'en-et', 'en-fi', 'en-fr', 'en-hu', 'en-it', 'en-lt', 'en-lv', 'en-mk', 'en-nl', 'en-no', 'en-pt', 'en-ru', 'en-sk', 'en-sl', 'en-sq', 'en-sv', 'en-tr', 'en-uk', 'es-be', 'es-de', 'es-en', 'es-ru', 'es-uk', 'et-en', 'et-ru', 'fi-en', 'fi-ru', 'fr-be', 'fr-de', 'fr-en', 'fr-ru', 'fr-uk', 'hr-ru', 'hu-en', 'hu-ru', 'hy-ru', 'it-be', 'it-de', 'it-en', 'it-ru', 'it-uk', 'lt-en', 'lt-ru', 'lv-en', 'l

## –ü–µ—Ä–µ–≤–æ–¥ —Ä—É—Å—Å–∫–∏—Ö –æ—Ç–∑—ã–≤–æ–≤ 

In [7]:
russian_text_df = df_1[df_1['Lang'] == 'ru']  
ru_length = len(russian_text_df) # –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Ä—É—Å—Å–∫–∏—Ö –æ—Ç–∑—ã–≤–æ–≤ 
p = 51 # –ø—Ä–æ–≤–µ—Ä–µ–Ω–Ω–∞—è –ø–æ—Ä—Ü–∏—è –¥–ª—è —è–Ω–¥–µ–∫—Å –ø–µ—Ä–µ–≤–æ–¥–∞ - –∫–æ–Ω–µ—Ü –∏–Ω—Ç–µ—Ä–≤–∞–ª–∞
cycles = 3 # –∫–æ–ª-–≤–æ –ø–æ–ª–Ω—ã—Ö —Ü–∏–∫–ª–æ–≤ 
cyc_tail = ru_length - (cycles*p) #–æ—Å—Ç–∞—Ç–æ–∫ —Ö–≤–æ—Å—Ç–∞ 

russian_list_ =[] #—Å–ø–∏—Å–æ–∫, –≥–¥–µ —Å–∫–ª–∞–¥—ã–≤–∞–µ–º –≤—Å–µ —Ç–µ–∫—Å—Ç—ã 

x = 0 #–Ω–∞—á–∞–ª–æ –∏–Ω—Ç–µ—Ä–≤–∞–ª–∞ 
cycles_count = 0 # –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø–æ—Ä—Ü–∏–π / –∏—Ç–µ—Ä–∞—Ü–∏–π 



for i in range (cycles):
    while cycles_count != cycles:
        text_portion = russian_text_df['Review'][x:p] # –≤—ã–±–∏—Ä–∞–µ–º —Ä—É—Å—Å–∫–∏–π —Ç–µ–∫—Å—Ç –∏ –∑–∞–ø—É—Å–∫–∞–µ–º –ø–µ—Ä–≤—ã–π —Ü–∏–∫–ª –Ω–∞—á–∏–Ω–∞–µ–º –æ—Ç 0 –¥–æ 51 
        
        for text in text_portion: # –ø–µ—Ä–µ–≤–æ–¥–∏–º –µ–≥–æ —á–µ—Ä–µ–∑ —è–Ω–¥–µ–∫—Å –∏ –≤—ã–±–∏—Ä–∞–µ–º –∏–∑ –Ω–µ–≥–æ —Å–ª–æ–≤–∞—Ä—å —Ç–µ–∫—Å—Ç–æ–≤ 
            russian = translate.translate(text, 'ru-en')['text']
            russian_list_.append(russian)#–¥–æ–±–∞–≤–ª—è–µ–º –µ–≥–æ –≤ –æ–±—â–∏–π —Å–ª–æ–≤–∞—Ä—å 
            x = p # –≥–æ—Ç–æ–≤–∏–º –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã –¥–ª—è —Å–ª–µ–¥—É—é—â–µ–π –ø–æ—Ä—Ü–∏–∏ 
            p = p + 50
            cycles_count += 1
    
    # –¥–æ–±–∞–≤–ª—è–µ–º —Ö–≤–æ—Å—Ç, –∫–æ—Ç–æ—Ä—ã–π –Ω–µ –≤–æ—à–µ–ª –≤ —Ü–∏–∫–ª (–¥–æ 50)
    if cycles_count == cycles:
        text_portion_1 = russian_text_df['Review'][p:]
        for text in text_portion_l: 
            russian_l = translate.translate(text, 'ru-en')['text']
            russian_list_.append(russian_l)
    else:
        break
        
print (len(russian_list_))

KeyboardInterrupt: 

In [8]:
russian_list[0]

['Thank you so much for your work! This is the best app I have installed. üëçüëçüëç Has everything you need to analyze the harmonious life of the baby.']

## –†—É—Å—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [7]:
russian_df_total = df_1[df_1['Lang'] == 'ru']

In [None]:
len(russian_df_total)

In [11]:
russian_df_1 = df_1[df_1['Lang'] == 'ru']['Review'][0:51]

In [8]:
russian_df_1 = df_1[df_1['Lang'] == 'ru']['Review'][0:51]
russian_df_2 = df_1[df_1['Lang'] == 'ru']['Review'][51:101]
russian_df_3 = df_1[df_1['Lang'] == 'ru']['Review'][101:151]
russian_df_4 = df_1[df_1['Lang'] == 'ru']['Review'][151:201]
russian_df_5 = df_1[df_1['Lang'] == 'ru']['Review'][201:251]
russian_df_6 = df_1[df_1['Lang'] == 'ru']['Review'][251:301]
russian_df_7 = df_1[df_1['Lang'] == 'ru']['Review'][301:351]
russian_df_8 = df_1[df_1['Lang'] == 'ru']['Review'][351:401]
russian_df_9 = df_1[df_1['Lang'] == 'ru']['Review'][401:451]
russian_df_10 = df_1[df_1['Lang'] == 'ru']['Review'][451:501]
russian_df_11 = df_1[df_1['Lang'] == 'ru']['Review'][501:551]
russian_df_12 = df_1[df_1['Lang'] == 'ru']['Review'][551:601]
russian_df_13 = df_1[df_1['Lang'] == 'ru']['Review'][601:651]
russian_df_14 = df_1[df_1['Lang'] == 'ru']['Review'][651:701]
russian_df_15 = df_1[df_1['Lang'] == 'ru']['Review'][701:751]
russian_df_16 = df_1[df_1['Lang'] == 'ru']['Review'][751:801]
russian_df_17 = df_1[df_1['Lang'] == 'ru']['Review'][801:851]
russian_df_18 = df_1[df_1['Lang'] == 'ru']['Review'][851:901]
russian_df_19 = df_1[df_1['Lang'] == 'ru']['Review'][901:951]
russian_df_20 = df_1[df_1['Lang'] == 'ru']['Review'][951:1001]
russian_df_21 = df_1[df_1['Lang'] == 'ru']['Review'][1001:1051]
russian_df_22 = df_1[df_1['Lang'] == 'ru']['Review'][1051:]

In [145]:
russ_1 = translate.translate(russian_df_1, 'ru-en')['text']
russ_2 = translate.translate(russian_df_2, 'ru-en')['text']
russ_3 = translate.translate(russian_df_3, 'ru-en')['text']
russ_4 = translate.translate(russian_df_4, 'ru-en')['text']
russ_5 = translate.translate(russian_df_5, 'ru-en')['text']
russ_6 = translate.translate(russian_df_6, 'ru-en')['text']
russ_7 = translate.translate(russian_df_7, 'ru-en')['text']
russ_8 = translate.translate(russian_df_8, 'ru-en')['text']
russ_9 = translate.translate(russian_df_9, 'ru-en')['text']
russ_10 = translate.translate(russian_df_10, 'ru-en')['text']
russ_11 = translate.translate(russian_df_11, 'ru-en')['text']
russ_12 = translate.translate(russian_df_12, 'ru-en')['text']
russ_13 = translate.translate(russian_df_13, 'ru-en')['text']
russ_14 = translate.translate(russian_df_14, 'ru-en')['text']
russ_15 = translate.translate(russian_df_15, 'ru-en')['text']
russ_16 = translate.translate(russian_df_16, 'ru-en')['text']
russ_17 = translate.translate(russian_df_17, 'ru-en')['text']
russ_18 = translate.translate(russian_df_18, 'ru-en')['text']
russ_19 = translate.translate(russian_df_19, 'ru-en')['text']
russ_20 = translate.translate(russian_df_20, 'ru-en')['text']
russ_21 = translate.translate(russian_df_21, 'ru-en')['text']
russ_22 = translate.translate(russian_df_22, 'ru-en')['text']

In [146]:
russ_total_lists = russ_1 + russ_2 + russ_3 + russ_4 + russ_5\
                + russ_6 + russ_7 + russ_8 + russ_9 + russ_10\
                + russ_11 + russ_12 + russ_13 + russ_14 + russ_15\
                + russ_16+ russ_17 +russ_18 +russ_19+ russ_20 +russ_21 + russ_22

In [None]:
russ_total_lists

In [150]:
len(russ_total_lists)

1081

## –ù–µ–º–µ—Ü–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥

In [15]:
german_df_total = df_1[df_1['Lang'] == 'de']

In [16]:
len(german_df_total)

349

In [19]:
german_df_1 = df_1[df_1['Lang'] == 'de']['Review'][0:51]
german_df_2 = df_1[df_1['Lang'] == 'de']['Review'][51:101]
german_df_3 = df_1[df_1['Lang'] == 'de']['Review'][101:151]
german_df_4 = df_1[df_1['Lang'] == 'de']['Review'][151:201]
german_df_5 = df_1[df_1['Lang'] == 'de']['Review'][201:251]
german_df_6 = df_1[df_1['Lang'] == 'de']['Review'][251:301]
german_df_7 = df_1[df_1['Lang'] == 'de']['Review'][301:]

In [18]:
de_1 = translate.translate(german_df_1, 'de-en')['text']

In [None]:
# –ø–µ—Ä–µ–≤–æ–¥ —è–∑—ã–∫

In [None]:
translate.detect('–ü—Ä–∏–≤–µ—Ç, –º–∏—Ä!')

In [37]:
#df_1[df_1['Lang'] == 'de'].Review

In [23]:
len(de_1)

48

In [20]:
de_2 = translate.translate(german_df_2, 'de-en')['text']
de_3 = translate.translate(german_df_3, 'de-en')['text']
de_4 = translate.translate(german_df_4, 'de-en')['text']
de_5 = translate.translate(german_df_5, 'de-en')['text']
de_6 = translate.translate(german_df_6, 'de-en')['text']
de_7 = translate.translate(german_df_7, 'de-en')['text']

In [36]:
len(de_7)

48

In [38]:
de_en_total_lists = de_1 + de_2 + de_3 + de_4 + de_5 + de_6 + de_7

In [39]:
len(de_en_total_lists)

346

## –ü–æ—Ä—Ç—É–≥–∞–ª—å—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥

In [40]:
porto_df_total = df_1[df_1['Lang'] == 'pt']

In [41]:
len(porto_df_total)

716

In [47]:
porto_df_1 = df_1[df_1['Lang'] == 'pt']['Review'][0:51]
porto_df_2 = df_1[df_1['Lang'] == 'pt']['Review'][51:101]
porto_df_3 = df_1[df_1['Lang'] == 'pt']['Review'][101:151]
porto_df_4 = df_1[df_1['Lang'] == 'pt']['Review'][151:201]
porto_df_5 = df_1[df_1['Lang'] == 'pt']['Review'][201:251]
porto_df_6 = df_1[df_1['Lang'] == 'pt']['Review'][251:301]
porto_df_7 = df_1[df_1['Lang'] == 'pt']['Review'][301:351]
porto_df_8 = df_1[df_1['Lang'] == 'pt']['Review'][351:401]
porto_df_9 = df_1[df_1['Lang'] == 'pt']['Review'][401:451]
porto_df_10 = df_1[df_1['Lang'] == 'pt']['Review'][451:501]
porto_df_11 = df_1[df_1['Lang'] == 'pt']['Review'][501:551]
porto_df_12 = df_1[df_1['Lang'] == 'pt']['Review'][551:601]
porto_df_13 = df_1[df_1['Lang'] == 'pt']['Review'][601:651]
porto_df_14 = df_1[df_1['Lang'] == 'pt']['Review'][651:701]
porto_df_15 = df_1[df_1['Lang'] == 'pt']['Review'][701:]

In [None]:
# –ø–µ—Ä–µ–≤–æ–¥–∏–º –Ω–∞ –∞–Ω–≥–ª–∏–π—Å–∫–∏–π

In [48]:
pt_1 = translate.translate(porto_df_1, 'pt-en')['text']
pt_2 = translate.translate(porto_df_2, 'pt-en')['text']
pt_3 = translate.translate(porto_df_3, 'pt-en')['text']
pt_4 = translate.translate(porto_df_4, 'pt-en')['text']
pt_5 = translate.translate(porto_df_5, 'pt-en')['text']
pt_6 = translate.translate(porto_df_6, 'pt-en')['text']
pt_7 = translate.translate(porto_df_7, 'pt-en')['text']
pt_8 = translate.translate(porto_df_8, 'pt-en')['text']
pt_9 = translate.translate(porto_df_9, 'pt-en')['text']
pt_10 = translate.translate(porto_df_10, 'pt-en')['text']
pt_11 = translate.translate(porto_df_11, 'pt-en')['text']
pt_12 = translate.translate(porto_df_12, 'pt-en')['text']
pt_13 = translate.translate(porto_df_13, 'pt-en')['text']
pt_14 = translate.translate(porto_df_14, 'pt-en')['text']
pt_15 = translate.translate(porto_df_15, 'pt-en')['text']

In [49]:
pt_en_total_lists = pt_1 + pt_2 + pt_3 + pt_4 + pt_5 +pt_6 +pt_7 +pt_8 + pt_9 +pt_10 + pt_11 + pt_12 +pt_13 + pt_14 +pt_15

In [50]:
len(pt_en_total_lists)

716

## –ò—Å–ø–∞–Ω—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [51]:
spanish_df_total = df_1[df_1['Lang'] == 'es']

In [52]:
len(spanish_df_total)

361

In [53]:
spanish_df_1 = df_1[df_1['Lang'] == 'es']['Review'][0:51]
spanish_df_2 = df_1[df_1['Lang'] == 'es']['Review'][51:101]
spanish_df_3 = df_1[df_1['Lang'] == 'es']['Review'][101:151]
spanish_df_4 = df_1[df_1['Lang'] == 'es']['Review'][151:201]
spanish_df_5 = df_1[df_1['Lang'] == 'es']['Review'][201:251]
spanish_df_6 = df_1[df_1['Lang'] == 'es']['Review'][251:301]
spanish_df_7 = df_1[df_1['Lang'] == 'es']['Review'][301:351]
spanish_df_8 = df_1[df_1['Lang'] == 'es']['Review'][351:]

In [None]:
# –ø–µ—Ä–µ–≤–æ–¥–∏–º –Ω–∞ –∏—Å–ø–∞–Ω—Å–∫–∏–π

In [54]:
sp_1 = translate.translate(spanish_df_1, 'es-en')['text']
sp_2 = translate.translate(spanish_df_2, 'es-en')['text']
sp_3 = translate.translate(spanish_df_3, 'es-en')['text']
sp_4 = translate.translate(spanish_df_4, 'es-en')['text']
sp_5 = translate.translate(spanish_df_5, 'es-en')['text']
sp_6 = translate.translate(spanish_df_6, 'es-en')['text']
sp_7 = translate.translate(spanish_df_7, 'es-en')['text']
sp_8 = translate.translate(spanish_df_8, 'es-en')['text']

In [55]:
es_en_total_lists = sp_1 + sp_2 +sp_3 + sp_4 + sp_5 + sp_6 +sp_7 +sp_8

In [56]:
len(es_en_total_lists)

361

## –ê–Ω–≥–ª–∏–π—Å–∫–∏–π 

In [60]:
english_df_total = df_1[df_1['Lang'] == 'en']['Review']

In [156]:
english_total_list = list(english_df_total)

In [157]:
type(english_total_list)

list

In [62]:
len(english_df_total)

356

## –§—Ä–∞–Ω—Ü—É–∑—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [63]:
french_df_total = df_1[df_1['Lang'] == 'fr']['Review']

In [65]:
len(french_df_total)

122

In [66]:
french_df_1 = df_1[df_1['Lang'] == 'fr']['Review'][0:51]
french_df_2 = df_1[df_1['Lang'] == 'fr']['Review'][51:101]
french_df_3 = df_1[df_1['Lang'] == 'fr']['Review'][101:151]

In [None]:
# –ø–µ—Ä–µ–≤–æ–¥–∏–º —Ñ—Ä–∞–Ω—Ü—É—Å–∫–∏–π –Ω–∞ –∞–Ω–≥–ª–∏–π—Å–∏–∫–π 

In [67]:
fr_1 = translate.translate(french_df_1, 'fr-en')['text']
fr_2 = translate.translate(french_df_2, 'fr-en')['text']
fr_3 = translate.translate(french_df_3, 'fr-en')['text']

In [68]:
fr_en_total_lists =  fr_1 + fr_2 + fr_3 

In [69]:
len(fr_en_total_lists)

122

## –ü–æ–ª—å—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [70]:
polish_df_total = df_1[df_1['Lang'] == 'pl']['Review']

In [71]:
len(polish_df_total)

88

In [78]:
polish_df_1 = df_1[df_1['Lang'] == 'pl']['Review'][0:51]
polish_df_2 = df_1[df_1['Lang'] == 'pl']['Review'][51:]

In [None]:
# –ø–µ—Ä–µ–≤–æ–¥–∏–º –ø–æ–ª—å—Å–∫–∏–π –Ω–∞ –∞–Ω–≥–ª–∏–π—Å–∏–∫–π 

In [79]:
pl_1 = translate.translate(polish_df_1, 'pl-en')['text']
pl_2 = translate.translate(polish_df_2, 'pl-en')['text']

In [80]:
pl_en_total_lists =  pl_1 + pl_2 

In [81]:
len (pl_en_total_lists )

88

## –ò—Ç–∞–ª—å—è–Ω—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [86]:
italian_df_total = df_1[df_1['Lang'] == 'it']['Review']

In [87]:
len(italian_df_total)

79

In [88]:
italian_df_1 = df_1[df_1['Lang'] == 'it']['Review'][0:51]
italian_df_2 = df_1[df_1['Lang'] == 'it']['Review'][51:]

In [90]:
it_1 = translate.translate(italian_df_1, 'it-en')['text']
it_2 = translate.translate(italian_df_2, 'it-en')['text']

In [91]:
it_en_total_lists =  it_1 + it_2 

In [92]:
len(it_en_total_lists)

79

## –¢—É—Ä–µ—Ü–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [93]:
turkish_df_total = df_1[df_1['Lang'] == 'tr']['Review']

In [95]:
len(turkish_df_total)

54

In [100]:
turkish_df_1 = df_1[df_1['Lang'] == 'tr']['Review']

In [147]:
tr_1 = translate.translate(turkish_df_1, 'tr-en')['text']

In [148]:
tr_en_total_lists = tr_1

In [149]:
len(tr_en_total_lists)

54

## –ù–∏–¥–µ—Ä–ª–∞–Ω–¥—ã –ø–µ—Ä–µ–≤–æ–¥ 

In [104]:
nl_df_total = df_1[df_1['Lang'] == 'nl']['Review']

In [105]:
len(nl_df_total)

50

In [106]:
nl_en_total_list = translate.translate(nl_df_total, 'nl-en')['text']

In [110]:
len(nl_en_total_list)

50

## –ß–µ—à—Å–∫–∏–π –ø–µ—Ä–µ–≤–æ–¥ 

In [107]:
cs_df_total = df_1[df_1['Lang'] == 'cs']['Review']

In [108]:
len(cs_df_total)

37

In [112]:
cs_en_total_list = translate.translate(cs_df_total, 'cs-en')['text']

In [113]:
len (cs_en_total_list)

37

## Greek translation 

In [114]:
el_df_total = df_1[df_1['Lang'] == 'el']['Review']

In [115]:
len(el_df_total)

17

In [116]:
el_en_total_list = translate.translate(el_df_total, 'el-en')['text']

In [117]:
len(el_en_total_list)

17

## Finish translation 

In [14]:
fi_df_total = df_1[df_1['Lang'] == 'fi']['Review']

In [16]:
type(fi_df_total)

pandas.core.series.Series

In [120]:
len(fi_df_total)

6

In [121]:
fi_en_total_list = translate.translate(fi_df_total, 'fi-en')['text']

In [122]:
len(fi_en_total_list)

6

## Danish translation 

In [123]:
da_df_total = df_1[df_1['Lang'] == 'da']['Review']

In [124]:
len(da_df_total)

5

In [125]:
da_en_total_list = translate.translate(da_df_total, 'da-en')['text']

In [126]:
len(da_en_total_list)

5

## Norwegian translation

In [127]:
no_df_total = df_1[df_1['Lang'] == 'no']['Review']

In [128]:
len(no_df_total )

4

In [129]:
no_en_total_list = translate.translate(no_df_total, 'no-en')['text']

In [130]:
len(no_en_total_list)

4

## Slovakian translation 

In [131]:
sk_df_total = df_1[df_1['Lang'] == 'sk']['Review']

In [132]:
len(sk_df_total)

4

In [133]:
sk_en_total_list = translate.translate(sk_df_total, 'sk-en')['text']

In [134]:
len(sk_df_total)

4

## Croatian translation 

In [135]:
hr_df_total = df_1[df_1['Lang'] == 'hr']['Review']

In [136]:
len(hr_df_total)

3

In [137]:
hr_en_total_list = translate.translate(hr_df_total, 'hr-en')['text']

In [138]:
len(hr_en_total_list)

3

## Bolgarian translation 

In [139]:
bg_df_total = df_1[df_1['Lang'] == 'bg']['Review']

In [140]:
len(bg_df_total)

3

In [141]:
bg_en_total_list = translate.translate(bg_df_total, 'bg-en')['text']

In [142]:
len(bg_en_total_list)

3

## –û–±—ä–µ–¥–∏–Ω—è–µ–º –≤—Å–µ –ø–µ—Ä–µ–≤–æ–¥—ã 

In [160]:
united_list = russ_total_lists + de_en_total_lists + pt_en_total_lists + es_en_total_lists + english_total_list\
                + fr_en_total_lists + pl_en_total_lists\
                + it_en_total_lists + tr_en_total_lists + nl_en_total_list + cs_en_total_list + el_en_total_list\
                + fi_en_total_list + da_en_total_list + no_en_total_list\
                + sk_en_total_list + hr_en_total_list + bg_en_total_list

In [161]:
len(united_list)

3332

## –ø–µ—Ä–µ–≤–æ–¥–∏–º –≤–µ—Å—å —Ç–µ–∫—Å—Ç –≤ –Ω–∏–∂–Ω–∏–π —Ä–µ–≥–∏—Ç—Ä 

In [167]:
united_list_lower=[i.lower() for i in united_list]

In [168]:
with open('united_list_lower.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(united_list_lower, f, pickle.HIGHEST_PROTOCOL)

In [12]:
with open('united_list_lower.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    united_list_lower = pickle.load(f)

In [13]:
united_list_lower[0:50]

['thank you so much for your work! this is the best app i have installed. üëçüëçüëç has everything you need to analyze the harmonious life of the baby.',
 "the app correctly displays the information in the end of the day. for example, the child's sleep consists of 37 hours per day. how can this be? when the baby was sleeping 11 hours. or walk, walking one hour, the application writes the results of 14 hours. developers, pay attention to it.",
 'somehow, the walk began to set ending time. that is, you cannot run it like a dream, and to note the completion time. walk immediately puts the beginning and end of the minute. you have to edit the time manually',
 'simple, convenient, no ads almost',
 'super! very helpful!',
 'i would like to decrypt in the charts clicking on a filled plot',
 'very convenient',
 'like the app, as an inexperienced mother i always forget when and what, so thank the developers for such a utility',
 'super app when you do not have to bother with all sorts of gra