Import the Empath library

In [None]:
from empath import Empath
lexicon = Empath()

Import the DataFrame

In [None]:
import pandas as pd
df = pd.read_csv("sentiment_subjectivity.csv")
df.head()

Create new categories

In [None]:
def create_custom_categories():
    categories = {
        "pro_stance" :  ["support", "approval", "agreement", "endorsement", "favor", "positive_opinion", "advocate", "in_favor", "backing"], # Adding my own words per category
        "moral_dilemma": ["ethics", "morality", "controversy", "decision_making", "right_vs_wrong"],
        "misinformation": ["fake_news", "disinformation", "propaganda", "conspiracy", "false_claims"],
        "human_rights": ["freedom", "equality", "discrimination", "justice", "civil_rights"],
        "abortion_rights": ["pro_choice", "pro_life", "reproductive_rights", "bodily_autonomy", "abortion_laws"],
        "war_justification": ["military_intervention", "self_defense", "war_crimes", "peace_treaty", "conflict_resolution"],
        "womens_rights": ["gender_equality", "feminism", "reproductive_rights", "pay_gap", "domestic_violence"]
    }

    model = "nytimes"  # Using "nytimes" for a more policy-related vocabulary

    for category, keywords in categories.items():
        lexicon.create_category(category, keywords, model=model)


In [None]:
create_custom_categories()

List of all the expected categories

In [None]:
# Add already existing Empath categories to the ones I created
categories = [
    "pain", "movement", "negative_emotion", "religion", "violence", "government", 
    "independence", "fear", "trust", "leader", "pro_stance", "moral_dilemma", 
    "misinformation", "human_rights", "abortion_rights", "war_justification", "womens_rights"
]

Execute the content analysis for the dataset

In [None]:
def analyze_text(text):
    if pd.isna(text):  # Handle null values
        return {category: 0.0 for category in categories}
    return lexicon.analyze(text, categories=categories, normalize=True)

In [None]:
# Apply the function to the 'captions' column and create a new DataFrame
analysis_results = df["captions"].apply(analyze_text).apply(pd.Series)

# Merge results with original DataFrame
df = pd.concat([df, analysis_results], axis=1)

df.head()

Adding the "Topic" column

In [None]:
import pandas as pd

# Keywords for war and abortion topics
war_keywords = ["Putin", "war", "anti-war"]
abortion_keywords = ["abortion", "anti-abortion"]

# Function to determine the topic based on search queries
def determine_topic(search_queries):
    if pd.isna(search_queries):
        return None
    search_queries = str(search_queries).lower()  # Convert to lowercase to handle case-insensitivity
    if any(keyword.lower() in search_queries for keyword in war_keywords):
        return "war"
    elif any(keyword.lower() in search_queries for keyword in abortion_keywords):
        return "abortion"
    return None

In [None]:
# Create the 'topic' column
df['topic'] = df['search_queries'].apply(determine_topic)

df.head()

Export the result

In [None]:
df.to_csv("content_analysis.csv", index=False)