In [7]:
import pandas as pd
import re
from collections import Counter

file_a = "D:/SOSC314/energy_narrative_C.csv"
file_b = "D:/SOSC314/energy_narrative_W.csv"
df_a = pd.read_csv(file_a)
df_b = pd.read_csv(file_b)

KEYWORDS = [
    "energy transition",
    "carbon neutrality",
    "climate policy"
]
STOPWORDS = set([
    "the","and","to","of","in","a","is","for","on","that","with","as",
    "are","was","be","by","this","from","or","an","it","at","which",
    "also","has","have","had","their","they","we","our","its","these",
    "those","such","than","then","there","here"
])


def extract_context(text, keyword, window=20):
    text = text.lower()
    tokens = re.findall(r"\b\w+\b", text)

    keyword_tokens = keyword.split()
    k_len = len(keyword_tokens)

    contexts = []

    for i in range(len(tokens) - k_len + 1):
        if tokens[i:i+k_len] == keyword_tokens:
            start = max(i - window, 0)
            end = min(i + k_len + window, len(tokens))
            contexts.extend(tokens[start:i])
            contexts.extend(tokens[i+k_len:end])

    return contexts

def analyze_source(df, source_name, text_col="full_content"):
    results = {}

    for kw in KEYWORDS:
        all_context_words = []

        for text in df[text_col].dropna():
            words = extract_context(text, kw, window=20)
            all_context_words.extend(words)

        counter = Counter(w for w in all_context_words
                          if w not in STOPWORDS and len(w) > 2
                         )

        results[kw] = counter

        # 导出结果
        out = pd.DataFrame(
            counter.most_common(50),
            columns=["word", "frequency"]
        )
        filename = f"{source_name}_{kw.replace(' ', '_')}_context.xlsx"
        out.to_excel(filename, index=False)

        print(f"Saved: {filename}")

    return results

results_A = analyze_source(df_a, "SourceA")
results_B = analyze_source(df_b, "SourceB")



Saved: SourceA_energy_transition_context.xlsx
Saved: SourceA_carbon_neutrality_context.xlsx
Saved: SourceA_climate_policy_context.xlsx
Saved: SourceB_energy_transition_context.xlsx
Saved: SourceB_carbon_neutrality_context.xlsx
Saved: SourceB_climate_policy_context.xlsx
