In [1]:
import pandas as pd
import ast
from collections import defaultdict, Counter

# Load the LDA-labeled dataset
df = pd.read_csv("../LDA_Result.csv", converters={"preprocessed_body": ast.literal_eval})

# Collect all words grouped by topic
topic_words = defaultdict(list)
for _, row in df.iterrows():
    topic = row['labeled topic']
    topic_words[topic].extend(row['preprocessed_body'])

# Extract top 5 most frequent words per topic
top_keywords_per_topic = {
    topic: [word for word, _ in Counter(words).most_common(5)]
    for topic, words in topic_words.items()
}

# Rule explanation generator using only topic-specific top keywords
def strict_rule_explanation(tokens, label):
    topic_keywords = top_keywords_per_topic[label]
    matched = [word for word in tokens if word in topic_keywords]
    if matched:
        keyword_list = ', '.join(f'"{word}"' for word in matched)
        return f'If comment contains {keyword_list} → {label}'
    else:
        return f'No strong topic-specific keywords found → {label}'

# Apply the rule generator
df['extracted_rule'] = df.apply(
    lambda row: strict_rule_explanation(row['preprocessed_body'], row['labeled topic']),
    axis=1
)

# Preview the results in a table
from IPython.display import display
display(df[['body', 'labeled topic', 'extracted_rule']].head(10))

Unnamed: 0,body,labeled topic,extracted_rule
0,People need to do this kind of thing more ofte...,Political Views and Climate Policy,"If comment contains ""people"", ""people"", ""chang..."
1,Thats cute if things dont reverse course in ou...,Environmental Impact and Global Warming,"If comment contains ""climate"", ""change"" → Envi..."
2,Whats interesting is that you are arguing with...,Scientific Discussions on Climate Change,"If comment contains ""time"", ""climate"", ""change..."
3,i can agree with that a lot of the media is ow...,Political Views and Climate Policy,"If comment contains ""climate"", ""change"" → Poli..."
4,The rising seas are due to global warming but ...,Environmental Impact and Global Warming,"If comment contains ""global"", ""climate"", ""chan..."
5,It goes even further than that If some scienti...,Scientific Discussions on Climate Change,"If comment contains ""time"", ""climate"", ""change..."
6,Maybe this little bird that only sings a song ...,Environmental Impact and Global Warming,"If comment contains ""climate"", ""change"" → Envi..."
7,Funny how all climate change deniers use the e...,Scientific Discussions on Climate Change,"If comment contains ""climate"", ""change"", ""peop..."
8,If were going to start talking long term then ...,Environmental Impact and Global Warming,"If comment contains ""climate"", ""change"" → Envi..."
9,Its like when climate change is mentioned they...,Scientific Discussions on Climate Change,"If comment contains ""climate"", ""change"" → Scie..."
