In [15]:
import helpers
from multinomial_nb import MultinomialNaiveBayes

DEPRESSION_PATH = "data/depression_dataset_reddit_cleaned.csv"
TDT_SPLIT = "80/10/10"

clean_text, is_depression = helpers.get_data(
    datapath=DEPRESSION_PATH,
    ngram=1,
    tokenize=True,
    by_character=False
)

depression_word = list(zip(clean_text, is_depression))

print("Example row:")
print(depression_word[0])

train, dev, test = helpers.split(depression_word, dist=TDT_SPLIT)

print(f"Train size: {len(train)}, Dev size: {len(dev)}, Test size: {len(test)}")

print("\n=== Train Stats ===")
helpers.get_stats(train)

print("\n=== Dev Stats ===")
helpers.get_stats(dev)

print("\n=== Test Stats ===")
helpers.get_stats(test)

nb_model = MultinomialNaiveBayes(alpha=1.0)
nb_model.fit(train)

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Convert test set to tokens + labels
y_true = [label for (_, label) in test_data]
y_pred = [nb_model.predict_one(tokens) for (tokens, _) in test_data]

precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)

print("\n=== Naive Bayes Final Metrics (Test Set) ===")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Accuracy:  {accuracy:.4f}")

# Row ready to copy into results CSV
print("\nCSV row:")
print(f"naive_bayes,{precision},{recall},{f1},{accuracy}")


Example row:
(['<s>', 'understand', 'people', 'reply', 'immediately', 'op', 'invitation', 'talk', 'privately', 'mean', 'help', 'type', 'response', 'usually', 'lead', 'either', 'disappointment', 'disaster', 'usually', 'work', 'quite', 'differently', 'say', 'pm', 'anytime', 'casual', 'social', 'context', 'huge', 'admiration', 'appreciation', 'goodwill', 'good', 'citizenship', 'many', 'support', 'others', 'flag', 'inappropriate', 'content', 'know', 'many', 'struggling', 'hard', 'work', 'behind', 'scene', 'information', 'resource', 'make', 'easier', 'give', 'get', 'quality', 'help', 'small', 'start', 'new', 'wiki', 'page', 'explains', 'detail', 'much', 'better', 'respond', 'public', 'comment', 'least', 'gotten', 'know', 'someone', 'maintained', 'r', 'wiki', 'private', 'contact', 'full', 'text', 'current', 'version', 'summary', 'anyone', 'acting', 'helper', 'invite', 'accepts', 'private', 'contact', 'e', 'pm', 'chat', 'kind', 'offsite', 'communication', 'early', 'conversion', 'showing', 'ei

In [19]:
import csv

row = [
    "naive_bayes",
    0.9129213483146067,
    0.8736559139784946,
    0.8928571428571429,
    0.8992248062015504
]

csv_path = "results/results.csv"

with open(csv_path, "a", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(row)

print("Row appended to results.csv")

Row appended to results.csv
