# Comments analysis

In [24]:
! pip install --upgrade pip
! pip install fasttext
! pip install emoji
! pip install pandas emoji openpyxl



In [25]:
import pandas as pd
import re
import fasttext
import emoji

In [26]:
# Defining file paths for Ukrainian and Russian channel comments
ukr_files = [
    "data/ua/Comments_ds.csv",
    "data/ua/Comments_ko.csv",
    "data/ua/Comments_uo.csv",
    "data/ua/Comments_no.csv"
]

ru_files = [
    "data/ru/Comments_cl.csv",
    "data/ru/Comments_dm.csv",
    "data/ru/Comments_re.csv"
]

# Helper function to load and label each DataFrame
def load_and_label_comments(file_list, channel_orientation_label):
    df_list = []
    for file in file_list:
        df = pd.read_csv(file)
        df["channel_orientation"] = channel_orientation_label
        df["source_file"] = file
        df_list.append(df)
    return pd.concat(df_list, ignore_index=True)

# Loading all Ukrainian and Russian comments
ukr_comments = load_and_label_comments(ukr_files, "pro-ukrainian")
ru_comments = load_and_label_comments(ru_files, "pro-russian")

# Combining both into a single DataFrame
df_all_comments = pd.concat([ukr_comments, ru_comments], ignore_index=True)

# Displaying basic info
df_all_comments

Unnamed: 0,post_id,comment_text,comment_date,comment_user_id,channel_orientation,source_file
0,21462,–≥–æ–ª–æ–≤–Ω–µ —â–æ –∫–∞—Ü–∞–ø–∏ –¥–æ—Ö–Ω—É—Ç—å –≤ –º—É–∫–∞—Ö. –∞ —Ç—ñ –∫–∞—Ü–∞–ø–∏...,2025-03-14 09:27:40+00:00,6.189523e+09,pro-ukrainian,data/ua/Comments_ds.csv
1,21462,—É –∫–æ–∂–Ω–æ–≥–æ –º–æ–∂–µ –±—É—Ç–∏ —Å–≤–æ—è –¥—É–º–∫–∞... –±–µ–∑ –ø—Ä–æ–±–ª–µ–º,2025-03-14 06:58:47+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv
2,21462,–Ω—É —Ç–∏ –º–∞–π–∂–µ —Ç–∞–∫–∏–π —Å–∞–º–∏–π —Å—Ç—Ä–∞—Ç–µ–≥ —è–∫ —Ä–∞–¥—ñ–æ–∞–∫—Ç–∏–≤–Ω...,2025-03-14 06:57:49+00:00,8.454073e+08,pro-ukrainian,data/ua/Comments_ds.csv
3,21462,"—Ç–∞–∫, —Å–∞–º–µ —Ç–∞–∫ —ñ –¥—É–º–∞—é .",2025-03-14 06:56:22+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv
4,21462,"–õ—é–¥–∏ —è–∫—ñ –∫–∞–∂—É—Ç—å, —â–æ —Ä–æ—Å—ñ—è –∑–∞–π—à–ª–∞ –Ω–∞ –°—É–º—â–∏–Ω—É —á...",2025-03-14 06:49:36+00:00,9.346062e+08,pro-ukrainian,data/ua/Comments_ds.csv
...,...,...,...,...,...,...
541662,93009,–ß–µ—Ä–Ω–æ–º–∞–∑—ã–π —Å—ã–Ω —à–ª—é—Ö–∏,2025-02-26 04:03:12+00:00,7.526166e+09,pro-russian,data/ru/Comments_re.csv
541663,93009,–ú—É—Ñ—Ç–∏–π –æ—Ç —Å–ª–æ–≤–∞ –º—É—Ñ—Ç–∞,2025-02-21 04:45:54+00:00,1.076707e+09,pro-russian,data/ru/Comments_re.csv
541664,93009,–ö–∞–∫ –æ–Ω–∏ –Ω–µ –ø–æ–π–º—É—Ç –æ–Ω–∏ –ø–æ—Ç–æ–º–∫–∏ –ø—Ä–æ–∏–≥—Ä–∞–≤—à–∏—Ö. –û–Ω–∏...,2025-02-20 19:05:05+00:00,1.183544e+09,pro-russian,data/ru/Comments_re.csv
541665,93009,–≠—Ç–æ –∫—Ç–æ —Ç–∞–∫–æ–π –≤–æ–æ–±—â–µ?,2025-02-20 15:50:46+00:00,9.271602e+08,pro-russian,data/ru/Comments_re.csv


### Text cleaning

In [27]:
def clean_text(comment_text):
    if pd.isnull(comment_text):
        return ""
    comment_text = re.sub(r"http\S+|www\S+|t\.me\S+", "", comment_text)  # remove URLs
    comment_text = re.sub(r"@\w+", "", comment_text)                     # remove mentions
    comment_text = re.sub(r"#\w+", "", comment_text)                     # remove hashtags
    return comment_text

In [28]:
df_all_comments["comment_text"] = df_all_comments["comment_text"].apply(clean_text)

### Detecting language

In [29]:
# Loading the language detection model
lang_model = fasttext.load_model("data/lid.176.ftz")

# Defining language detection function
def detect_language(text):
    if not isinstance(text, str) or len(text.strip()) < 10:
        return "unknown"
    prediction = lang_model.predict(text.replace('\n', ' '), k=1)
    return prediction[0][0].replace("__label__", "")

In [30]:
df_all_comments = df_all_comments[df_all_comments["comment_text"].notna()]
df_all_comments["comment_language"] = df_all_comments["comment_text"].apply(detect_language)
df_all_comments

Unnamed: 0,post_id,comment_text,comment_date,comment_user_id,channel_orientation,source_file,comment_language
0,21462,–≥–æ–ª–æ–≤–Ω–µ —â–æ –∫–∞—Ü–∞–ø–∏ –¥–æ—Ö–Ω—É—Ç—å –≤ –º—É–∫–∞—Ö. –∞ —Ç—ñ –∫–∞—Ü–∞–ø–∏...,2025-03-14 09:27:40+00:00,6.189523e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk
1,21462,—É –∫–æ–∂–Ω–æ–≥–æ –º–æ–∂–µ –±—É—Ç–∏ —Å–≤–æ—è –¥—É–º–∫–∞... –±–µ–∑ –ø—Ä–æ–±–ª–µ–º,2025-03-14 06:58:47+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk
2,21462,–Ω—É —Ç–∏ –º–∞–π–∂–µ —Ç–∞–∫–∏–π —Å–∞–º–∏–π —Å—Ç—Ä–∞—Ç–µ–≥ —è–∫ —Ä–∞–¥—ñ–æ–∞–∫—Ç–∏–≤–Ω...,2025-03-14 06:57:49+00:00,8.454073e+08,pro-ukrainian,data/ua/Comments_ds.csv,uk
3,21462,"—Ç–∞–∫, —Å–∞–º–µ —Ç–∞–∫ —ñ –¥—É–º–∞—é .",2025-03-14 06:56:22+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk
4,21462,"–õ—é–¥–∏ —è–∫—ñ –∫–∞–∂—É—Ç—å, —â–æ —Ä–æ—Å—ñ—è –∑–∞–π—à–ª–∞ –Ω–∞ –°—É–º—â–∏–Ω—É —á...",2025-03-14 06:49:36+00:00,9.346062e+08,pro-ukrainian,data/ua/Comments_ds.csv,uk
...,...,...,...,...,...,...,...
541662,93009,–ß–µ—Ä–Ω–æ–º–∞–∑—ã–π —Å—ã–Ω —à–ª—é—Ö–∏,2025-02-26 04:03:12+00:00,7.526166e+09,pro-russian,data/ru/Comments_re.csv,ru
541663,93009,–ú—É—Ñ—Ç–∏–π –æ—Ç —Å–ª–æ–≤–∞ –º—É—Ñ—Ç–∞,2025-02-21 04:45:54+00:00,1.076707e+09,pro-russian,data/ru/Comments_re.csv,ru
541664,93009,–ö–∞–∫ –æ–Ω–∏ –Ω–µ –ø–æ–π–º—É—Ç –æ–Ω–∏ –ø–æ—Ç–æ–º–∫–∏ –ø—Ä–æ–∏–≥—Ä–∞–≤—à–∏—Ö. –û–Ω–∏...,2025-02-20 19:05:05+00:00,1.183544e+09,pro-russian,data/ru/Comments_re.csv,ru
541665,93009,–≠—Ç–æ –∫—Ç–æ —Ç–∞–∫–æ–π –≤–æ–æ–±—â–µ?,2025-02-20 15:50:46+00:00,9.271602e+08,pro-russian,data/ru/Comments_re.csv,ru


# Removing duplicates

In [31]:
df_all_comments = df_all_comments.drop_duplicates(subset="comment_text")

# Normalising whitespace

In [32]:
df_all_comments.loc["comment_text"] = df_all_comments["comment_text"].str.replace(r'\s+', ' ', regex=True).str.strip()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_comments.loc["comment_text"] = df_all_comments["comment_text"].str.replace(r'\s+', ' ', regex=True).str.strip()


# Lowercasing

In [33]:
df_all_comments["comment_text"] = df_all_comments["comment_text"].str.lower()
df_all_comments

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_comments["comment_text"] = df_all_comments["comment_text"].str.lower()


Unnamed: 0,post_id,comment_text,comment_date,comment_user_id,channel_orientation,source_file,comment_language
0,21462.0,–≥–æ–ª–æ–≤–Ω–µ —â–æ –∫–∞—Ü–∞–ø–∏ –¥–æ—Ö–Ω—É—Ç—å –≤ –º—É–∫–∞—Ö. –∞ —Ç—ñ –∫–∞—Ü–∞–ø–∏...,2025-03-14 09:27:40+00:00,6.189523e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk
1,21462.0,—É –∫–æ–∂–Ω–æ–≥–æ –º–æ–∂–µ –±—É—Ç–∏ —Å–≤–æ—è –¥—É–º–∫–∞... –±–µ–∑ –ø—Ä–æ–±–ª–µ–º,2025-03-14 06:58:47+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk
2,21462.0,–Ω—É —Ç–∏ –º–∞–π–∂–µ —Ç–∞–∫–∏–π —Å–∞–º–∏–π —Å—Ç—Ä–∞—Ç–µ–≥ —è–∫ —Ä–∞–¥—ñ–æ–∞–∫—Ç–∏–≤–Ω...,2025-03-14 06:57:49+00:00,8.454073e+08,pro-ukrainian,data/ua/Comments_ds.csv,uk
3,21462.0,"—Ç–∞–∫, —Å–∞–º–µ —Ç–∞–∫ —ñ –¥—É–º–∞—é .",2025-03-14 06:56:22+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk
4,21462.0,"–ª—é–¥–∏ —è–∫—ñ –∫–∞–∂—É—Ç—å, —â–æ —Ä–æ—Å—ñ—è –∑–∞–π—à–ª–∞ –Ω–∞ —Å—É–º—â–∏–Ω—É —á...",2025-03-14 06:49:36+00:00,9.346062e+08,pro-ukrainian,data/ua/Comments_ds.csv,uk
...,...,...,...,...,...,...,...
541662,93009.0,—á–µ—Ä–Ω–æ–º–∞–∑—ã–π —Å—ã–Ω —à–ª—é—Ö–∏,2025-02-26 04:03:12+00:00,7.526166e+09,pro-russian,data/ru/Comments_re.csv,ru
541663,93009.0,–º—É—Ñ—Ç–∏–π –æ—Ç —Å–ª–æ–≤–∞ –º—É—Ñ—Ç–∞,2025-02-21 04:45:54+00:00,1.076707e+09,pro-russian,data/ru/Comments_re.csv,ru
541664,93009.0,–∫–∞–∫ –æ–Ω–∏ –Ω–µ –ø–æ–π–º—É—Ç –æ–Ω–∏ –ø–æ—Ç–æ–º–∫–∏ –ø—Ä–æ–∏–≥—Ä–∞–≤—à–∏—Ö. –æ–Ω–∏...,2025-02-20 19:05:05+00:00,1.183544e+09,pro-russian,data/ru/Comments_re.csv,ru
541666,93005.0,—ç—Ç–æ —á—Ç–æ —Ç–∞–∫–æ–µ,2025-02-27 05:20:47+00:00,7.478964e+09,pro-russian,data/ru/Comments_re.csv,ru


# Transforming emotion into a text


In [34]:
# Function to transform emojis
def transform_emojis(text):
    if pd.isnull(text):
        return ""
    return emoji.demojize(text, delimiters=(":"," "))

In [35]:
# Applying transformation to the "text" column
df_all_comments["text_transformed"] = df_all_comments["comment_text"].apply(transform_emojis)
df_all_comments

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_all_comments["text_transformed"] = df_all_comments["comment_text"].apply(transform_emojis)


Unnamed: 0,post_id,comment_text,comment_date,comment_user_id,channel_orientation,source_file,comment_language,text_transformed
0,21462.0,–≥–æ–ª–æ–≤–Ω–µ —â–æ –∫–∞—Ü–∞–ø–∏ –¥–æ—Ö–Ω—É—Ç—å –≤ –º—É–∫–∞—Ö. –∞ —Ç—ñ –∫–∞—Ü–∞–ø–∏...,2025-03-14 09:27:40+00:00,6.189523e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk,–≥–æ–ª–æ–≤–Ω–µ —â–æ –∫–∞—Ü–∞–ø–∏ –¥–æ—Ö–Ω—É—Ç—å –≤ –º—É–∫–∞—Ö. –∞ —Ç—ñ –∫–∞—Ü–∞–ø–∏...
1,21462.0,—É –∫–æ–∂–Ω–æ–≥–æ –º–æ–∂–µ –±—É—Ç–∏ —Å–≤–æ—è –¥—É–º–∫–∞... –±–µ–∑ –ø—Ä–æ–±–ª–µ–º,2025-03-14 06:58:47+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk,—É –∫–æ–∂–Ω–æ–≥–æ –º–æ–∂–µ –±—É—Ç–∏ —Å–≤–æ—è –¥—É–º–∫–∞... –±–µ–∑ –ø—Ä–æ–±–ª–µ–º
2,21462.0,–Ω—É —Ç–∏ –º–∞–π–∂–µ —Ç–∞–∫–∏–π —Å–∞–º–∏–π —Å—Ç—Ä–∞—Ç–µ–≥ —è–∫ —Ä–∞–¥—ñ–æ–∞–∫—Ç–∏–≤–Ω...,2025-03-14 06:57:49+00:00,8.454073e+08,pro-ukrainian,data/ua/Comments_ds.csv,uk,–Ω—É —Ç–∏ –º–∞–π–∂–µ —Ç–∞–∫–∏–π —Å–∞–º–∏–π —Å—Ç—Ä–∞—Ç–µ–≥ —è–∫ —Ä–∞–¥—ñ–æ–∞–∫—Ç–∏–≤–Ω...
3,21462.0,"—Ç–∞–∫, —Å–∞–º–µ —Ç–∞–∫ —ñ –¥—É–º–∞—é .",2025-03-14 06:56:22+00:00,6.763106e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk,"—Ç–∞–∫, —Å–∞–º–µ —Ç–∞–∫ —ñ –¥—É–º–∞—é ."
4,21462.0,"–ª—é–¥–∏ —è–∫—ñ –∫–∞–∂—É—Ç—å, —â–æ —Ä–æ—Å—ñ—è –∑–∞–π—à–ª–∞ –Ω–∞ —Å—É–º—â–∏–Ω—É —á...",2025-03-14 06:49:36+00:00,9.346062e+08,pro-ukrainian,data/ua/Comments_ds.csv,uk,"–ª—é–¥–∏ —è–∫—ñ –∫–∞–∂—É—Ç—å, —â–æ —Ä–æ—Å—ñ—è –∑–∞–π—à–ª–∞ –Ω–∞ —Å—É–º—â–∏–Ω—É —á..."
...,...,...,...,...,...,...,...,...
541662,93009.0,—á–µ—Ä–Ω–æ–º–∞–∑—ã–π —Å—ã–Ω —à–ª—é—Ö–∏,2025-02-26 04:03:12+00:00,7.526166e+09,pro-russian,data/ru/Comments_re.csv,ru,—á–µ—Ä–Ω–æ–º–∞–∑—ã–π —Å—ã–Ω —à–ª—é—Ö–∏
541663,93009.0,–º—É—Ñ—Ç–∏–π –æ—Ç —Å–ª–æ–≤–∞ –º—É—Ñ—Ç–∞,2025-02-21 04:45:54+00:00,1.076707e+09,pro-russian,data/ru/Comments_re.csv,ru,–º—É—Ñ—Ç–∏–π –æ—Ç —Å–ª–æ–≤–∞ –º—É—Ñ—Ç–∞
541664,93009.0,–∫–∞–∫ –æ–Ω–∏ –Ω–µ –ø–æ–π–º—É—Ç –æ–Ω–∏ –ø–æ—Ç–æ–º–∫–∏ –ø—Ä–æ–∏–≥—Ä–∞–≤—à–∏—Ö. –æ–Ω–∏...,2025-02-20 19:05:05+00:00,1.183544e+09,pro-russian,data/ru/Comments_re.csv,ru,–∫–∞–∫ –æ–Ω–∏ –Ω–µ –ø–æ–π–º—É—Ç –æ–Ω–∏ –ø–æ—Ç–æ–º–∫–∏ –ø—Ä–æ–∏–≥—Ä–∞–≤—à–∏—Ö. –æ–Ω–∏...
541666,93005.0,—ç—Ç–æ —á—Ç–æ —Ç–∞–∫–æ–µ,2025-02-27 05:20:47+00:00,7.478964e+09,pro-russian,data/ru/Comments_re.csv,ru,—ç—Ç–æ —á—Ç–æ —Ç–∞–∫–æ–µ


# Merging posts with comments datasets

In [36]:
# Loading posts dataframe already with filtered relevant posts
df_posts = pd.read_csv("data/analysis/processed_csv/relevant_posts.csv")
df_posts

Unnamed: 0,post_id,date,text,views,forwards,channel_id,channel_name,language,date_only,text_transformed,...,dehumanization,mockery,credibility,USA_blame,Russia_blame,propaganda_technique,narrative_summary,source,bloc,period
0,156507,2025-03-04 08:47:39,"–∞–º–µ—Ä–∏–∫–∞–Ω—Å–∫–∏–µ —Å–º–∏ –∑–∞—è–≤–ª—è—é—Ç, —á—Ç–æ –≤—á–µ—Ä–∞ –≤–µ–Ω–≥—Ä–∏—è –∑...",205470,292,-1001101806611,boris_rozhin,ru,2025-03-04,"–∞–º–µ—Ä–∏–∫–∞–Ω—Å–∫–∏–µ —Å–º–∏ –∑–∞—è–≤–ª—è—é—Ç, —á—Ç–æ –≤—á–µ—Ä–∞ –≤–µ–Ω–≥—Ä–∏—è –∑...",...,1.0,1.0,,1.0,0.0,"sarcasm, rhetorical ridicule, selective quotin...",Ukraine must grovel to Trump and the EU for su...,cl,pro-russian,post
1,156500,2025-03-04 07:28:05,–±—ã–≤—à–∏–π –º—ç—Ä –Ω—å—é-–π–æ—Ä–∫–∞ –∏ –∞–¥–≤–æ–∫–∞—Ç —Ç—Ä–∞–º–ø–∞ —Ä—É–¥–æ–ª—å—Ñ ...,216760,642,-1001101806611,boris_rozhin,ru,2025-03-04,–±—ã–≤—à–∏–π –º—ç—Ä –Ω—å—é-–π–æ—Ä–∫–∞ –∏ –∞–¥–≤–æ–∫–∞—Ç —Ç—Ä–∞–º–ø–∞ —Ä—É–¥–æ–ª—å—Ñ ...,...,1.0,1.0,1.0,0.0,0.0,"sarcasm, rhetorical ridicule, emotional appeal","Giuliani joined in denouncing Zelensky, and mo...",cl,pro-russian,post
2,156491,2025-03-04 03:15:49,**–≤–∏—Ü–µ-–ø—Ä–µ–∑–∏–¥–µ–Ω—Ç —Å—à–∞ –≤—ç–Ω—Å:**\n\n1. —É–∫—Ä–∞–∏–Ω–∞ –Ω–µ ...,238309,544,-1001101806611,boris_rozhin,ru,2025-03-04,**–≤–∏—Ü–µ-–ø—Ä–µ–∑–∏–¥–µ–Ω—Ç —Å—à–∞ –≤—ç–Ω—Å:**\n\n1. —É–∫—Ä–∞–∏–Ω–∞ –Ω–µ ...,...,0.0,0.0,1.0,0.0,0.0,"rhetorical assertion, simplification, attribut...",Vance said Ukraine is incapable of continuing ...,cl,pro-russian,post
3,156489,2025-03-04 02:47:15,"**–∑–µ–ª–µ–Ω—Å–∫–∏–π —É–∫—É—Å–∏–ª —Ä—É–∫—É, –∫–æ—Ç–æ—Ä–∞—è –µ–≥–æ –∫–æ—Ä–º–∏—Ç, –∫...",223542,322,-1001101806611,boris_rozhin,ru,2025-03-04,"**–∑–µ–ª–µ–Ω—Å–∫–∏–π —É–∫—É—Å–∏–ª —Ä—É–∫—É, –∫–æ—Ç–æ—Ä–∞—è –µ–≥–æ –∫–æ—Ä–º–∏—Ç, –∫...",...,0.0,0.0,0.0,0.0,0.0,"metaphor of subordination, emotional appeal, m...","Even though presented as a ‚Äúquote,‚Äù no evidenc...",cl,pro-russian,post
4,156483,2025-03-04 00:38:57,**–±–µ–ª—ã–π –¥–æ–º –ø–æ–¥—Ç–≤–µ—Ä–¥–∏–ª –ø—Ä–∏–æ—Å—Ç–∞–Ω–æ–≤–∫—É –≤–æ–µ–Ω–Ω–æ–π –ø–æ...,212596,612,-1001101806611,boris_rozhin,ru,2025-03-04,**–±–µ–ª—ã–π –¥–æ–º –ø–æ–¥—Ç–≤–µ—Ä–¥–∏–ª –ø—Ä–∏–æ—Å—Ç–∞–Ω–æ–≤–∫—É –≤–æ–µ–Ω–Ω–æ–π –ø–æ...,...,0.0,1.0,1.0,0.0,0.0,"rhetorical intimidation, emotional appeal, moc...",The White House confirmed suspension of aid an...,cl,pro-russian,post
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317,95885,2025-02-27 18:33:14,"**–º–æ—ó —Å—Ç–æ—Å—É–Ω–∫–∏ –∑ –∑–µ–ª–µ–Ω—Å—å–∫–∏–º —Å—Ç–∞–ª–∏ ""–¥–µ—â–æ –Ω–∞–ø—Ä—É–∂...",225883,236,-1001233777422,UaOnlii,uk,2025-02-27,"**–º–æ—ó —Å—Ç–æ—Å—É–Ω–∫–∏ –∑ –∑–µ–ª–µ–Ω—Å—å–∫–∏–º —Å—Ç–∞–ª–∏ ""–¥–µ—â–æ –Ω–∞–ø—Ä—É–∂...",...,0.0,1.0,2.0,1.0,0.0,"sarcasm, rhetorical question, emotional appeal",Sarcastically questions why relations are tens...,uo,pro-ukrainian,pre
318,95884,2025-02-27 18:28:36,"**‚ùóÔ∏è—è –Ω–µ –º–æ–∂—É –ø–æ–≤—ñ—Ä–∏—Ç–∏, —â–æ –Ω–∞–∑–≤–∞–≤ –ø—Ä–µ–∑–∏–¥–µ–Ω—Ç–∞ –∑...",229000,1562,-1001233777422,UaOnlii,uk,2025-02-27,"**:exclamation_mark —è –Ω–µ –º–æ–∂—É –ø–æ–≤—ñ—Ä–∏—Ç–∏, —â–æ –Ω–∞–∑...",...,0.0,1.0,2.0,1.0,0.0,"sarcasm, rhetorical ridicule, emotional appeal",Mocks Trump‚Äôs denial by sarcastically claiming...,uo,pro-ukrainian,pre
319,95883,2025-02-27 18:22:07,"‚ùóÔ∏è**—Ç—Ä–∞–º–ø –Ω–∞ –∑–∞–ø–∏—Ç–∞–Ω–Ω—è, —á–∏ –≤–≤–∞–∂–∞—î –≤—ñ–Ω, —è–∫ —ñ —Ä–∞...",226815,1520,-1001233777422,UaOnlii,uk,2025-02-27,":exclamation_mark **—Ç—Ä–∞–º–ø –Ω–∞ –∑–∞–ø–∏—Ç–∞–Ω–Ω—è, —á–∏ –≤–≤–∞...",...,0.0,0.0,2.0,1.0,0.0,"rhetorical understatement, implied critique",Uses three moai emojis to signal skepticism an...,uo,pro-ukrainian,pre
320,95881,2025-02-27 18:10:26,"‚ùóÔ∏è**—è –Ω–µ –¥—É–º–∞—é, —â–æ –ø—É—Ç—ñ–Ω –∑–Ω–æ–≤—É –≤—Ç–æ—Ä–≥–Ω–µ—Ç—å—Å—è, —è–∫...",218506,237,-1001233777422,UaOnlii,uk,2025-02-27,":exclamation_mark **—è –Ω–µ –¥—É–º–∞—é, —â–æ –ø—É—Ç—ñ–Ω –∑–Ω–æ–≤—É...",...,,,2.0,,,,,uo,pro-ukrainian,pre


In [37]:
df_all_comments = df_all_comments.copy()

In [38]:
# Parsing timestamps
df_posts["date"] = pd.to_datetime(df_posts["date"], errors="coerce")
df_all_comments["comment_date"] = pd.to_datetime(df_all_comments["comment_date"], errors="coerce")

In [39]:
# Preparing post-level data (drop duplicates just in case)
keep_columns = [
    "post_id", "date", "views", "forwards", "channel_id",
    "channel_name", "source", "bloc", "period"
]
df_posts_clean = df_posts[keep_columns].drop_duplicates(subset=["post_id"])

In [40]:
print(df_all_comments["post_id"].dtype)
print(df_posts_clean["post_id"].dtype)

float64
int64


In [41]:
valid_post_ids = set(df_posts_clean["post_id"])
df_all_comments = df_all_comments[df_all_comments["post_id"].isin(valid_post_ids)]

In [42]:
# Merging post metadata into comments
df_all = df_all_comments.merge(df_posts_clean, on="post_id", how="left")

In [43]:
df_all[["post_id", "date", "comment_date"]].dropna().sample(5)

Unnamed: 0,post_id,date,comment_date
16933,53606.0,2025-02-28 17:26:37,2025-02-28 17:32:14+00:00
28038,93559.0,2025-02-28 20:36:52,2025-02-28 22:23:39+00:00
26241,93598.0,2025-03-01 19:34:28,2025-03-01 20:52:17+00:00
12606,95985.0,2025-02-28 18:00:15,2025-02-28 19:01:34+00:00
16443,53610.0,2025-02-28 18:27:10,2025-02-28 18:54:54+00:00


In [44]:
# Normalizing timezones (make both tz-naive)
try:
    df_all["comment_date"] = df_all["comment_date"].dt.tz_localize(None)
except Exception:
    pass

try:
    df_all["date"] = df_all["date"].dt.tz_localize(None)
except Exception:
    pass

# Compute time delta from post to comment (in hours)
df_all["post_age_at_comment"] = (
    df_all["comment_date"] - df_all["date"]
).dt.total_seconds() / 3600

In [45]:
df_all

Unnamed: 0,post_id,comment_text,comment_date,comment_user_id,channel_orientation,source_file,comment_language,text_transformed,date,views,forwards,channel_id,channel_name,source,bloc,period,post_age_at_comment
0,21390.0,"there will be no freedom, first trump is prepa...",2025-03-02 10:28:41,7.868621e+09,pro-ukrainian,data/ua/Comments_ds.csv,en,"there will be no freedom, first trump is prepa...",2025-02-28 20:20:56,441866,1146,-1001469021333,DeepStateUA,ds,pro-ukrainian,post,38.129167
1,21390.0,i think us and ukraine could have a very good ...,2025-03-02 10:27:22,8.170125e+09,pro-ukrainian,data/ua/Comments_ds.csv,en,i think us and ukraine could have a very good ...,2025-02-28 20:20:56,441866,1146,-1001469021333,DeepStateUA,ds,pro-ukrainian,post,38.107222
2,21390.0,—Ç–∏ –∞–Ω—Ç–∏—Å–µ–º—ñ—Ç!!!—Ç–∏ –≥—ñ—Ç–ª–µ—Ä–∞ –ø—ñ–¥—Ç—Ä–∏–º—É—î—à???\n—Ü–µ —è–∫...,2025-03-02 10:10:15,7.868621e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk,—Ç–∏ –∞–Ω—Ç–∏—Å–µ–º—ñ—Ç!!!—Ç–∏ –≥—ñ—Ç–ª–µ—Ä–∞ –ø—ñ–¥—Ç—Ä–∏–º—É—î—à???\n—Ü–µ —è–∫...,2025-02-28 20:20:56,441866,1146,-1001469021333,DeepStateUA,ds,pro-ukrainian,post,37.821944
3,21390.0,i think same goes for israel. lots of american...,2025-03-02 10:09:37,8.170125e+09,pro-ukrainian,data/ua/Comments_ds.csv,en,i think same goes for israel. lots of american...,2025-02-28 20:20:56,441866,1146,-1001469021333,DeepStateUA,ds,pro-ukrainian,post,37.811389
4,21390.0,"—Ç–æ–±—Ç–æ –Ω–µ –ø–æ—Å–∏–ª–∞—Ç–∏ –≥—Ä–æ—à—ñ –≤ —ñ–∑—Ä–∞—ó–ª—å, –±–æ –±—ñ–¥–Ω–∏–º –∞...",2025-03-02 10:08:18,7.868621e+09,pro-ukrainian,data/ua/Comments_ds.csv,uk,"—Ç–æ–±—Ç–æ –Ω–µ –ø–æ—Å–∏–ª–∞—Ç–∏ –≥—Ä–æ—à—ñ –≤ —ñ–∑—Ä–∞—ó–ª—å, –±–æ –±—ñ–¥–Ω–∏–º –∞...",2025-02-28 20:20:56,441866,1146,-1001469021333,DeepStateUA,ds,pro-ukrainian,post,37.789444
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32885,93453.0,–∑–µ–º–ª–∏ - —Ä–æ—Å—Å–∏–∏ üá∑üá∫\n–¥–æ–ª–≥–∏ - –µ–≤—Ä–æ–ø–µ üá™üá∫\n–¥–µ–Ω—å–≥–∏ -...,2025-02-26 15:41:55,5.057871e+08,pro-russian,data/ru/Comments_re.csv,ru,–∑–µ–º–ª–∏ - —Ä–æ—Å—Å–∏–∏ :Russia \n–¥–æ–ª–≥–∏ - –µ–≤—Ä–æ–ø–µ :Europ...,2025-02-26 15:37:46,397626,1383,-1001260622817,readovkanews,re,pro-russian,pre,0.069167
32886,93453.0,–±—ã—Ç—å –¥—Ä—É–∑—å—è–º–∏ —Ä–æ—Å—Å–∏–∏ –∏ –±—ã—Ç—å —Å —Ä–æ—Å—Å–∏–µ–π –Ω–∞ —Ä–∞–≤–Ω—ã...,2025-02-26 15:41:24,5.203758e+09,pro-russian,data/ru/Comments_re.csv,ru,–±—ã—Ç—å –¥—Ä—É–∑—å—è–º–∏ —Ä–æ—Å—Å–∏–∏ –∏ –±—ã—Ç—å —Å —Ä–æ—Å—Å–∏–µ–π –Ω–∞ —Ä–∞–≤–Ω—ã...,2025-02-26 15:37:46,397626,1383,-1001260622817,readovkanews,re,pro-russian,pre,0.060556
32887,93453.0,"50%? –∞–º–µ—Ä–∏–∫–æ—Å—ã –∑–∞–±–µ—Ä—É—Ç 150,–≤ —ç—Ç–æ–º —Å–æ–º–Ω–µ–≤–∞—Ç—å—Å—è ...",2025-02-26 15:39:35,5.169864e+09,pro-russian,data/ru/Comments_re.csv,ru,"50%? –∞–º–µ—Ä–∏–∫–æ—Å—ã –∑–∞–±–µ—Ä—É—Ç 150,–≤ —ç—Ç–æ–º —Å–æ–º–Ω–µ–≤–∞—Ç—å—Å—è ...",2025-02-26 15:37:46,397626,1383,-1001260622817,readovkanews,re,pro-russian,pre,0.030278
32888,93453.0,–¥–æ–Ω—è –∞–≥–µ–Ω—Ç –∫—Ä–µ–º–ª—è,2025-02-26 15:38:56,2.033804e+09,pro-russian,data/ru/Comments_re.csv,ru,–¥–æ–Ω—è –∞–≥–µ–Ω—Ç –∫—Ä–µ–º–ª—è,2025-02-26 15:37:46,397626,1383,-1001260622817,readovkanews,re,pro-russian,pre,0.019444


post_age_at_comment is the number of hours between the time a post was published on a Telegram channel and the time a specific comment was made in response to that post.
It‚Äôs calculated as:
post_age_at_comment = comment_date - post_date

I calculated it because:
1. Not all comments are equally reactive. A comment written 3 minutes after a post is very different in intent and engagement than one left 3 days later.

 2. By computing the age of the post at the time of each comment, I now have a temporal signal ‚Äî a way to measure when users engage, not just what they say.