<a href="https://colab.research.google.com/github/DVerma11/Reddit_Anxiety_Symptoms_Narratives_NLP_Exploration/blob/main/Section_3_Negative_Symptom_Detection_Symptoms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Section 3: Negative Symptom Detection

Input File: Step2B_symptom_phrases_exploded.csv

Column to be processed: "comment_body_clean_phrases"

## 3.1 Load files for Negation Detection

span-aware, rule-based negation detection for dependency parsing and negation cues

In [None]:
import pandas as pd
symptom_phrases_exploded_df = pd.read_csv("Step2B_symptom_phrases_exploded.csv")

In [None]:
# Preview first few rows
symptom_phrases_exploded_df[["author_hash", "comment_body_clean_phrases"]].head(10)

Unnamed: 0,author_hash,comment_body_clean_phrases
0,bfc763f738dd81303e35d089fde639e68495eab77cc322...,idea well feel
1,bfc763f738dd81303e35d089fde639e68495eab77cc322...,feeling
2,bfc763f738dd81303e35d089fde639e68495eab77cc322...,feel go die
3,bfc763f738dd81303e35d089fde639e68495eab77cc322...,go therapy
4,bfc763f738dd81303e35d089fde639e68495eab77cc322...,take medication month
5,bfc763f738dd81303e35d089fde639e68495eab77cc322...,feel lot well month
6,bfc763f738dd81303e35d089fde639e68495eab77cc322...,m
7,9bba55d20948ae8babbea1c68977c6d0c65cfc5a6d7412...,thank
8,9bba55d20948ae8babbea1c68977c6d0c65cfc5a6d7412...,experience ton similar symptom
9,9bba55d20948ae8babbea1c68977c6d0c65cfc5a6d7412...,s horrible give severe hypochondria


In [None]:
# Check for missing values
symptom_phrases_exploded_df["comment_body_clean_phrases"].isna().sum()

np.int64(0)

In [None]:
# Optional: strip whitespace
symptom_phrases_exploded_df["comment_body_clean_phrases"] = symptom_phrases_exploded_df["comment_body_clean_phrases"].str.strip()

We will use symptom_annotations_df(symptom_annotations_normalized_deduplicated.csv) for Negation detection

## 3.2 Load SpaCy's en_core_web_sm"

In [None]:
#Step 1: Load spaCy and set up caching
import spacy

# Load the small English model
nlp = spacy.load("en_core_web_sm")

# Cache parsed docs by comment_id
doc_cache = {}

def get_doc(comment_id, text):
    """
    Returns a cached spaCy doc for a given comment_id.
    If not already parsed, parses and caches it.
    """
    if comment_id not in doc_cache:
        doc_cache[comment_id] = nlp(text)
    return doc_cache[comment_id]

In [None]:
#Step 2: Define negation detection function
# Negation patterns
ATTENUATION_PATTERNS = {"not as", "not really", "not very", "less than", "much less"}
NEGATORS = {"no", "not", "never", "without", "none", "cannot", "can't", "doesn't", "didn't", "denies", "denied"}

def is_span_negated(doc, char_start, char_end, window=3):
    """
    Returns True/False if the span is negated and the cue word.
    """
    span = doc.char_span(char_start, char_end, alignment_mode="expand")
    if span is None:
        return False, None

    sent = span.sent
    sent_text = sent.text.lower()

    # Exclude attenuation / comparative contexts
    if any(pat in sent_text for pat in ATTENUATION_PATTERNS):
        return False, None

    # Look around the span within the sentence
    left = max(sent.start, span.start - window)
    right = min(sent.end, span.end + window)

    for token in doc[left:right]:
        if token.dep_ == "neg":
            return True, token.text
        if token.lower_ in NEGATORS:
            return True, token.text

    return False, None




## 3.3 Define Negation Detection

In [None]:
# Step 3: Apply negation detection to exploded phrases

# Helper: find start/end offsets of phrase in original comment
def find_phrase_offsets(comment_text, phrase):
    """
    Returns (start, end) of the first occurrence of phrase in comment_text.
    Returns (None, None) if not found.
    """
    try:
        start = comment_text.lower().index(phrase.lower())
        end = start + len(phrase)
        return start, end
    except ValueError:
        return None, None

# Function to detect negation per row
def detect_negation(row):
    comment_text = row["comment_body_clean"]  # original full comment text
    phrase = row["comment_body_clean_phrases"]
    start, end = find_phrase_offsets(comment_text, phrase)
    if start is None:
        return False, None
    doc = get_doc(row["comment_id"], comment_text)
    return is_span_negated(doc, start, end)



## 3.4 Apply negation Detection

In [None]:
# Apply to DataFrame
symptom_phrases_exploded_df[["is_negated", "negation_cue"]] = symptom_phrases_exploded_df.apply(
    lambda row: pd.Series(detect_negation(row)),
    axis=1
)

In [None]:
symptom_phrases_exploded_df[["comment_body_clean_phrases", "is_negated", "negation_cue"]].head(10)


Unnamed: 0,comment_body_clean_phrases,is_negated,negation_cue
0,idea well feel,False,
1,feeling,False,
2,feel go die,False,
3,go therapy,False,
4,take medication month,False,
5,feel lot well month,False,
6,m,True,no
7,thank,False,
8,experience ton similar symptom,False,
9,s horrible give severe hypochondria,False,


In [None]:
# Count of negated vs non-negated phrases
negation_counts = symptom_phrases_exploded_df["is_negated"].value_counts()
print(negation_counts)


is_negated
False    4061
True       47
Name: count, dtype: int64


In [None]:
# Show n negated phrases
symptom_phrases_exploded_df[symptom_phrases_exploded_df["is_negated"]].head(10)[
    ["comment_body_clean", "comment_body_clean_phrases", "negation_cue", "comment_id"]
]


Unnamed: 0,comment_body_clean,comment_body_clean_phrases,negation_cue,comment_id
6,omg you have no idea how much better i feel. i...,m,no,l5k13qf
66,hello everyone. i m currently recovering from ...,change,no,lwwknq7
82,hello everyone. i m currently recovering from ...,give,not,lwwknq7
84,hello everyone. i m currently recovering from ...,tell,not,lwwknq7
88,hello everyone. i m currently recovering from ...,quit,never,lwwknq7
190,my recovery my journey through depersonalizati...,magic pill,no,mars8tv
218,my recovery my journey through depersonalizati...,mankind design,not,mars8tv
355,this is me right now avoiding to be on medicat...,ugh,not,lxsmtec
611,after my mom passed went back to work ya. they...,girlfriend,not,lwbz0lt
755,hi. how did you recover from these? i have had...,sure,not,mocqr4b


In [None]:
# Save the DataFrame with negation flags
symptom_phrases_exploded_df.to_csv(
    "Step3_symptom_phrases_negation.csv",
    index=False,
    encoding="utf-8"
)


End of Section