In [53]:
import pandas as pd
from googletrans import Translator

from mongo_orm import MongoORM

import os
from dotenv import load_dotenv

In [57]:
def translate(data: str | list[str], src: str, dest: str) -> str | dict[str: str]:
    """
    Function translates text from src lang to dest lang

    :param data: either can be string or list of string

    :return: if string is passed, returns transalted string, if list of sreing
    is passed returns list of dict where orig is original text and trans is translated
    """
    translator = Translator()

    if isinstance(data, str):
        try:
            result = translator.translate(data, src=src, dest=dest)
            
            if result.text is not None:
                return result.text
            
            raise ConnectionError(f'Could not translate {data[:5]}...')
        except Exception as e:
            raise ConnectionError(f'Could not translate {data[:5]}...: {e}')
    else:
        trans = []

        for text in data:
            try:
                result = translator.translate(text, src="ru", dest="en")
            except Exception as e:
                raise ConnectionError(f'Could not translate {text[:5]}...: {e}')

            trans.append(
                {
                    "original": text,
                    "trans": result.text
                }
            )

In [43]:
MONGO_IP = os.getenv("MONGO_IP")
MONGO_PORT = os.getenv("MONGO_PORT")
MONGO_DB = os.getenv("MONGO_DB")
MONGO_USERNAME = os.getenv("MONGO_USERNAME")
MONGO_PASSWORD = os.getenv("MONGO_PASSWORD")

In [68]:
mongo_client = MongoORM(
    db=MONGO_DB,
    user=MONGO_USERNAME,
    password=MONGO_PASSWORD,
    ip=MONGO_IP,
    port=MONGO_PORT
)

2025-03-07 15:54:39,256 INFO Coonnected to itm database on 206.81.16.39


In [71]:
channels = mongo_client.get_all_collections()

print(f'All channels: {channels}')

All channels: ['RVvoenkor', 'readovkanews']


In [46]:
CHANNEL_NAME = "readovkanews"

In [47]:
posts = mongo_client.get_collection_entries(collection="readovkanews")

In [48]:
channel_posts_comments = {}
for post in posts:
    channel_posts_comments[post["_id"]] = {
        "text": post["text"],
        "comments": pd.DataFrame(post["comments"])
    }

In [49]:
for post, data in channel_posts_comments.items():
    print(f'For post with id: {post} and text: {data["text"][:10]}... {len(data["comments"])} comments saved')

For post with id: 93652 and text: Британский... 197 comments saved
For post with id: 93651 and text: Украинские... 120 comments saved
For post with id: 93649 and text: Зеленский ... 145 comments saved
For post with id: 93647 and text: «Донбасс с... 226 comments saved
For post with id: 93645 and text: Украинских... 91 comments saved
For post with id: 93643 and text: Вернувшимс... 86 comments saved
For post with id: 93642 and text: ❗️В Россию... 135 comments saved
For post with id: 93634 and text: На украинс... 250 comments saved
For post with id: 93626 and text: Командован... 149 comments saved
For post with id: 93623 and text: Поножовщин... 773 comments saved
For post with id: 93618 and text: «Министры ... 334 comments saved
For post with id: 93617 and text: Зеленский ... 192 comments saved
For post with id: 93616 and text: Европа буд... 490 comments saved
For post with id: 93610 and text: Президент ... 103 comments saved
For post with id: 93600 and text: Кучка проп... 399 comments sav

In [67]:
data = channel_posts_comments[93652]["comments"].iloc[10]

print(f'Original text: {data["text"]}')
print(f'Translation: {translate(data["text"], src="ru", dest="en")}')

Original text: Кишка тонка! Они слишком трусливы, чтобы встревать в прямой конфликт с РФ
Translation: The gut is thin!They are too cowardly to get into a direct conflict with the Russian Federation


In [84]:
channel_posts_comments[93652]["comments"]


Unnamed: 0,id,text,author,posting_ts,post_id
0,19814482,"Интересует подлётная скорость ""Орешника"" к Лон...",5188742892,1.741124e+09,93652
1,19813741,"Если МО РФ им это позволит то ""врагов народа"" ...",2115101064,1.741114e+09,93652
2,19811760,ну если бритиши хотят свою армию земле придать...,7606681005,1.741101e+09,93652
3,19811673,"Убивать нужно всех! Всех нахуй к бандере, и хо...",1053428964,1.741099e+09,93652
4,19811154,"вводите, но за этим очевидно пойдет 3я мировая...",6540396053,1.741091e+09,93652
...,...,...,...,...,...
192,19807729,Вот мы и движемся к развязке... Лозунги за мир...,729590455,1.741030e+09,93652
193,19807721,"Европа превращается в военный блок,а это уже о...",6125797970,1.741030e+09,93652
194,19807719,мелкобриты пытаются развязать 3-ю мировую,5094298324,1.741030e+09,93652
195,19807709,Англичанка гадит,5717425094,1.741030e+09,93652


In [78]:
print(channel_posts_comments[93652]["comments"].iloc[3]["text"])
print(translate(channel_posts_comments[93652]["comments"].iloc[3]["text"], src="ru", dest="en"))

Убивать нужно всех! Всех нахуй к бандере, и хохлов и наемников. Особенно хохолов!
You need to kill everyone!Fuck everyone to Bandera, and crests and mercenaries.Especially Khokholov!
