### V2 REDDIT EXTRACTION + KNOWLEDGE BASE PREPARATION


In [19]:
import os
from dotenv import load_dotenv

load_dotenv()  # reads .env if present

REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT") or "knowledge-collection-service/0.1"

Scoring Functions


In [20]:
import re
import math
from typing import List

# --- Heuristics - Regex---
STEP_PAT = re.compile(r"^(\s*[-*\d\.)]+\s+|\b(try|consider|start|first|next|then|finally|should)\b)", re.I)

def advice_like(text: str) -> bool:
    if not text: return False
    t = text.strip()
    if len(t) < 30:  # too short to be useful advice
        return False
    # contains bullets, numbers, or directive verbs
    return bool(STEP_PAT.search(t))

#Not implemented - should be discussed!!!
# -- WILL MOVE TO HAVING LLM GENERATE ADVICE, VIA STEPS [PROVIDED WITH CONTEXT]
def extract_steps(text: str) -> List[str]:
    # crude split by lines that look like bullet/numbered steps
    steps = []
    for line in text.splitlines():
        lt = line.strip()
        if len(lt) >= 4 and (lt.startswith("-") or lt.startswith("*") or re.match(r"^\d+\.|^\d+\)", lt)):
            steps.append(lt.lstrip("-* ").strip())
    # fallback: look for sentences with directive verbs
    if not steps:
        sents = re.split(r"(?<=[.!?])\s+", text)
        for s in sents:
            if advice_like(s):
                steps.append(s.strip())
    # keep unique-ish
    seen = set()
    uniq = []
    for s in steps:
        k = s.lower()
        if k not in seen:
            seen.add(k)
            uniq.append(s)
    return uniq[:10]

def score_comment(score: int, num_replies: int, awards: int, length: int) -> float:
    # Normalize-ish by simple log scaling and weights
    s = math.log1p(max(score, 0)) * 0.6
    r = math.log1p(max(num_replies, 0)) * 0.3
    a = math.log1p(max(awards, 0)) * 0.1
    L = 0.0
    if 60 <= length <= 1200:
        # reward reasonable length
        L = 0.2
    return s + r + a + L

def score_submission(score: int, num_comments: int, upvote_ratio: float) -> float:
    s = math.log1p(max(score, 0)) * 0.5
    c = math.log1p(max(num_comments, 0)) * 0.3
    u = (upvote_ratio or 0.5) * 0.2
    return s + c + u

Extraction


In [21]:
import praw
import uuid

if not (REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET and REDDIT_USER_AGENT):
        raise SystemExit("Missing Reddit creds. Fill .env first (REDDIT_CLIENT_ID/SECRET/USER_AGENT).")

reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT,
)


LIMIT = 20  # per subreddit
submissions = []
seen_ids = set()

# Fetching for time-insensitive domains
domains = [
    {
        "domain_id": "6ba11411-ff82-4528-89f4-b3193ce7189f",
        "domain_tag": "FAC",
        "domain_name": "Family and Caregiving",
        "subreddits": [
            "family",
            "relationship_advice",
            "Parenting",
            "caregiving",
        ],
    },
    {
        "domain_id": "953de4be-d2f0-4ca2-9a89-252ce61ad883",
        "domain_tag": "PH",
        "domain_name": "Physical Health",
        "subreddits": [
            "fitness",
            "loseit",
            "nutrition",
            "xxfitness",
        ],
    },
    {
        "domain_id": "ad0ac9aa-f840-4256-ba16-3c4190951655",
        "domain_tag": "MH",
        "domain_name": "Mental Health",
        "subreddits": [
            "mentalhealth",
            "GetDisciplined",
            "DecidingToBeBetter",
            "adhdwomen",
            "Anxietyhelp",
        ],
    },
]

# KB Will be built without queries to avoid bias in the extracted dataset

In [22]:
import json
for d in domains:
    print(d)

{'domain_id': '6ba11411-ff82-4528-89f4-b3193ce7189f', 'domain_tag': 'FAC', 'domain_name': 'Family and Caregiving', 'subreddits': ['family', 'relationship_advice', 'Parenting', 'caregiving']}
{'domain_id': '953de4be-d2f0-4ca2-9a89-252ce61ad883', 'domain_tag': 'PH', 'domain_name': 'Physical Health', 'subreddits': ['fitness', 'loseit', 'nutrition', 'xxfitness']}
{'domain_id': 'ad0ac9aa-f840-4256-ba16-3c4190951655', 'domain_tag': 'MH', 'domain_name': 'Mental Health', 'subreddits': ['mentalhealth', 'GetDisciplined', 'DecidingToBeBetter', 'adhdwomen', 'Anxietyhelp']}


In [23]:
# Building my submissions object
from loguru import logger

logger.info("üöÄ Collecting submissions across domains...")
for domain in domains:
        logger.info(f"Fetching Top Submissions from Subreddits for Domain --> [{domain['domain_tag']}: {domain['domain_name']}]")
        for sub_name in domain["subreddits"]:
            subreddit = reddit.subreddit(sub_name)
            logger.info(f"  ‚Ü≥ Fetching from r/{sub_name}")

            try:
                for submission in subreddit.top(limit=LIMIT):

                    # Skip duplicates
                    if submission.id in seen_ids:
                        continue
                    seen_ids.add(submission.id)

                    # Filter useless posts
                    if submission.num_comments < 5:
                        continue

                    # Skipping empty placeholders, mostly ads or promotions atimes
                    if (not submission.selftext or submission.selftext in ["[removed]", "[deleted]"]) \
                            and submission.num_comments == 0:
                        continue

                    submissions.append({
                        "submission_id": submission.id,
                        "domain_tag": domain["domain_tag"],
                        "domain_name": domain["domain_name"],
                        "subreddit": sub_name,
                        "title": submission.title,
                        "selftext": submission.selftext,
                        "score": submission.score,
                        "upvote_ratio": submission.upvote_ratio,
                        "comment_count": submission.num_comments,
                        "permalink": f"https://www.reddit.com{submission.permalink}",
                        "created_utc": submission.created_utc,
                    })
                
            except Exception as e:
                logger.error(f"Error loading r/{sub_name}: {e}")


[32m2026-01-20 17:18:31.491[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1müöÄ Collecting submissions across domains...[0m
[32m2026-01-20 17:18:31.492[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mFetching Top Submissions from Subreddits for Domain --> [FAC: Family and Caregiving][0m
[32m2026-01-20 17:18:31.494[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m  ‚Ü≥ Fetching from r/family[0m
[32m2026-01-20 17:18:32.752[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m  ‚Ü≥ Fetching from r/relationship_advice[0m
[32m2026-01-20 17:18:33.142[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m  ‚Ü≥ Fetching from r/Parenting[0m
[32m2026-01-20 17:18:33.531[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1m  ‚Ü≥ Fetching from r/caregiving[0m
[32m2026-01-20 17:18:33.904[0m | [1mINFO    [0m | [36m__main__

Exploring what we got


In [24]:
import json
from collections import Counter

print(f"Total submissions: {len(submissions)}")
# domain distribution
domain_counts = Counter([s["domain_tag"] for s in submissions])
print("\nBy Domain:", dict(domain_counts))
# subreddit distribution
sub_counts = Counter([s["subreddit"] for s in submissions])
print("\nBy Subreddit:", dict(sub_counts))
# average comment count
avg_comments = sum(s["comment_count"] for s in submissions) / len(submissions)
print(f"\nAvg comments per post: {avg_comments:.2f}")

# Basic checkout
count = 0
for submission in submissions:
    if submission['domain_tag'] == 'PH' and submission['subreddit'] == 'loseit':
        print(json.dumps(submission, indent=5))
        count += 1
        if count >= 10:
            break


Total submissions: 254

By Domain: {'FAC': 74, 'PH': 80, 'MH': 100}

By Subreddit: {'family': 20, 'relationship_advice': 20, 'Parenting': 20, 'caregiving': 14, 'fitness': 20, 'loseit': 20, 'nutrition': 20, 'xxfitness': 20, 'mentalhealth': 20, 'GetDisciplined': 20, 'DecidingToBeBetter': 20, 'adhdwomen': 20, 'Anxietyhelp': 20}

Avg comments per post: 619.32
{
     "submission_id": "83re4p",
     "domain_tag": "PH",
     "domain_name": "Physical Health",
     "subreddit": "loseit",
     "title": "So apparently my husband has a new girlfriend.",
     "selftext": "My husband is a member of a club that meets about twice monthly, and has been going to this club for about 6 years. I usually don't go because they're all talking about eye crossingly booring stuff (electronics and stuff), but every once in a while I go if they're doing something halfway interesting, just enough so that I know most of them at least a little bit. \n\nLast week was the monthly breakfast social at a diner that makes 

Fetching comments on score and relevance to advice


In [25]:
import time

MAX_COMMENTS = 20
REQUEST_DELAY = 1.2 # Important RL metric : NEED TO CHANGE LATER REMEMBER

start_time = time.time()
logger.info(f"üöÄ Starting comments Mining for {len(submissions)} submissions")

for i, submission in enumerate(submissions):
    sid = submission["submission_id"]
    title_preview = submission['title'][:50] + ("..." if len(submission['title']) > 50 else "")
    logger.info(f"[{i+1}/{len(submissions)}] Fetching comments for: {sid} ‚Äî {title_preview}")

    try:
        submission_praw = reddit.submission(id=sid)
        submission_praw.comment_sort = "top"
        submission_praw.comments.replace_more(limit=0)

        top_comments = []

        for comment in submission_praw.comments.list()[:MAX_COMMENTS]:
            # Filter junk
            body = (comment.body or "").strip()
            if not body or body in ("[deleted]", "[removed]"):
                continue
            if len(body) < 30:
                continue

            top_comments.append({
                "comment_id": comment.id,
                "body": body,
                "score": comment.score,
                "author": str(comment.author) if comment.author else None,
                "created_utc": comment.created_utc,
                "replies_count": len(comment.replies),
            })

        submission["top_comments"] = top_comments
        submission["num_fetched_comments"] = len(top_comments)
        logger.success(f"‚úÖ Stored {len(top_comments)} comments for {sid}")

    except Exception as e:
        logger.error(f"‚ùå Failed on submission {sid}: {str(e)}")
        submission["top_comments"] = []
        submission["num_fetched_comments"] = 0

    # API cooldown, otherwise everything will blow up
    time.sleep(REQUEST_DELAY)

    # Progress checkpoint every 25 submissions
    if (i + 1) % 25 == 0:
        elapsed = time.time() - start_time
        logger.info(f"Current Progress: {i+1}/{len(submissions)} | Elapsed: {elapsed:.1f}s")

# Summary
total_time = time.time() - start_time
logger.info(f"Completed comment fetching in: {total_time:.2f} seconds")

[32m2026-01-20 17:18:37.194[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1müöÄ Starting comments Mining for 254 submissions[0m
[32m2026-01-20 17:18:37.195[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1m[1/254] Fetching comments for: mldrr6 ‚Äî [UPDATE] I made my dad choose between me and his n...[0m
[32m2026-01-20 17:18:38.058[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m40[0m - [32m[1m‚úÖ Stored 20 comments for mldrr6[0m
[32m2026-01-20 17:18:39.260[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1m[2/254] Fetching comments for: mj358z ‚Äî I made my dad choose between me and his new family...[0m
[32m2026-01-20 17:18:39.913[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m40[0m - [32m[1m‚úÖ Stored 19 comments for mj358z[0m
[32m2026-01-20 17:18:41.115[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1m[3/

Exploring once again


In [26]:
count = 0
for submission in submissions:
    if submission['domain_tag'] == 'PH' and submission['subreddit'] == 'loseit':
        print(f"Submission - {submission['submission_id']} | {submission['domain_name']}, ({submission['num_fetched_comments']}) Comments: ")
        for comment in submission['top_comments']:
            print(json.dumps(comment, indent=5))
        count += 1
        if count >= 10:
            break

Submission - 83re4p | Physical Health, (18) Comments: 
{
     "comment_id": "dvjzkoe",
     "body": "I thought this was going a completely different direction!\n\nCongrats! That's so great!",
     "score": 13003,
     "author": null,
     "created_utc": 1520818964.0,
     "replies_count": 22
}
{
     "comment_id": "dvk4kro",
     "body": "My mother had the same thing happen to her. She went an informal high school reunion with her best friend and a few other women, and they there were being really enthusiastically happy to see her friend and would just politely day hi to her. She was getting really mad that everyone was being a bitch to her, until they went around the table saying what they had been up to, and her friend said \"Well X and i have been living in the city\" and they all went \"oh X! How is she, what is she up to?\" And my mom's friend pointed at her and said \"Um, she's right there...\" turned out they just didn't recognize her and were very happy to see her.",
     "scor

### Uploading to KB


In [27]:
import os
import psycopg2
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
conn = psycopg2.connect(
    host="localhost",
    port=5432,
    dbname="knowledgebase",
    # Will change later but chill for now
    user="stiw_user",
    password="stiw_pwd"
)
cursor = conn.cursor()
print("‚úÖ Connected to Postgres and OpenAI")



‚úÖ Connected to Postgres and OpenAI


There was the idea that we can treat a discussion thread as a document, but we can move on to that later and now just focus on the comments and self text being embedded as we deal with no replies


In [28]:
from itertools import islice

domain_map = {d["domain_tag"]: d["domain_id"] for d in domains}

def batched(iterable, n):
    it = iter(iterable)
    while batch := list(islice(it, n)):
        yield batch

logger.info("Starting batched embedding and insertion...")

BATCH_SIZE = 50
for batch in batched(submissions, BATCH_SIZE):
    texts = [(s["selftext"] or s["title"]).strip() for s in batch if (s["selftext"] or s["title"]).strip()]
    ids = [s["submission_id"] for s in batch]

    try:
        emb_resp = client.embeddings.create(
            model="text-embedding-3-small",
            input=texts
        )
        embeddings = [e.embedding for e in emb_resp.data]

        for s, emb in zip(batch, embeddings):
            cursor.execute("""
                INSERT INTO submissions (
                    submission_id, domain_id, subreddit, title, selftext,
                    score, upvote_ratio, comment_count, permalink, created_utc, embedding
                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, to_timestamp(%s), %s)
                ON CONFLICT (submission_id) DO NOTHING
            """, (
                s["submission_id"], str(domain_map[s["domain_tag"]]), s["subreddit"],
                s["title"], s["selftext"], s["score"], s["upvote_ratio"],
                s["comment_count"], s["permalink"], s["created_utc"], emb
            ))

        conn.commit()
        logger.info(f"Inserted batch of {len(batch)} submissions.")

    except Exception as e:
        logger.error(f"Batch failed: {e}")
        conn.rollback()

logger.success("All batched submissions inserted.")


[32m2026-01-20 17:31:02.001[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mStarting batched embedding and insertion...[0m
[32m2026-01-20 17:31:05.903[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mInserted batch of 50 submissions.[0m
[32m2026-01-20 17:31:10.301[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mInserted batch of 50 submissions.[0m
[32m2026-01-20 17:31:14.282[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mInserted batch of 50 submissions.[0m
[32m2026-01-20 17:31:17.850[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mInserted batch of 50 submissions.[0m
[32m2026-01-20 17:31:21.243[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mInserted batch of 50 submissions.[0m
[32m2026-01-20 17:31:21.869[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m38[0m - [1mInsert

In [29]:
logger.info("Starting comment embedding + insertion...")

BATCH_SIZE = 50
total_inserted = 0

# Need to flatten via submission id, so I can have easy direct uploads
all_comments = []
for s in submissions:
    sid = s["submission_id"]
    for c in s.get("top_comments", []):
        all_comments.append({
            "comment_id": c["comment_id"],
            "submission_id": sid,
            "body": c["body"],
            "score": c.get("score", 0),
            "author": c.get("author"),
            "created_utc": c.get("created_utc", 0),
        })

logger.info(f"Collected {len(all_comments)} total comments to embed.")

for batch_idx, batch in enumerate(batched(all_comments, BATCH_SIZE), start=1):
    texts = [c["body"] for c in batch if c["body"].strip() not in ("[deleted]", "[removed]")]
    if not texts:
        continue

    try:
        emb_resp = client.embeddings.create(
            model="text-embedding-3-small",
            input=texts
        )
        embeddings = [e.embedding for e in emb_resp.data]

        for c, emb in zip(batch, embeddings):
            cursor.execute("""
                INSERT INTO comments (
                    comment_id, submission_id, author, body, score, created_utc, embedding
                ) VALUES (%s, %s, %s, %s, %s, to_timestamp(%s), %s)
                ON CONFLICT (comment_id) DO NOTHING
            """, (
                c["comment_id"],
                c["submission_id"],
                c.get("author"),
                c["body"],
                c.get("score", 0),
                c.get("created_utc", 0),
                emb
            ))
            total_inserted += 1

        conn.commit()
        logger.info(f"Batch {batch_idx}: inserted {len(batch)} comments.")

    except Exception as e:
        conn.rollback()
        logger.error(f"Batch {batch_idx} failed: {e}")
        time.sleep(2)

logger.success(f"Inserted total of {total_inserted} comments successfully.")


[32m2026-01-20 17:31:21.879[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mStarting comment embedding + insertion...[0m
[32m2026-01-20 17:31:21.891[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m20[0m - [1mCollected 4207 total comments to embed.[0m
[32m2026-01-20 17:31:25.432[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1mBatch 1: inserted 50 comments.[0m
[32m2026-01-20 17:31:29.158[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1mBatch 2: inserted 50 comments.[0m
[32m2026-01-20 17:31:32.256[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1mBatch 3: inserted 50 comments.[0m
[32m2026-01-20 17:31:35.696[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1mBatch 4: inserted 50 comments.[0m
[32m2026-01-20 17:31:39.249[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1mBatch 5: insert

### LLM Output


In [30]:
import json
from openai import OpenAI

client = OpenAI() 

user_input = input("Enter your query or goal: ").strip()

# Embed query
print("Generating embedding for query...")
q_emb = client.embeddings.create(
    model="text-embedding-3-small",
    input=user_input
).data[0].embedding

try:
    conn.rollback()
    logger.info("Transaction rolled back successfully")
except Exception as e:
    logger.warning(f"Rollback warning: {e}")

# Retrieve top submissions
logger.info("Retrieving top matching submissions...")
try:
    cursor.execute("""
        SELECT submission_id, domain_id, title, selftext, permalink
        FROM submissions
        ORDER BY embedding <-> %s::vector
        LIMIT 3;
    """, (json.dumps(q_emb),))
    top_subs = cursor.fetchall()

    if not top_subs:
        logger.warning("No relevant submissions found.")
    else:
        logger.info(f"Found {len(top_subs)} relevant submissions.")

    # Collect related comments
    context_blocks = []
    sources = []  # track all permalinks for final output
    
    for sid, domain_id, title, selftext, permalink in top_subs:
        cursor.execute("""
            SELECT body, score
            FROM comments
            WHERE submission_id = %s
            ORDER BY embedding <-> %s::vector
            LIMIT 5;
        """, (sid, json.dumps(q_emb)))
        comments = cursor.fetchall()

        joined_comments = "\n".join([f"- {c[0][:600]}" for c in comments])
        block = f"""
[POST] "{title}"
URL: {permalink}

{selftext[:1200] if selftext else "(No text body)"}

Top Comments:
{joined_comments}
"""
        context_blocks.append(block)
        sources.append(f"- {title}: https://reddit.com{permalink}")

    context_text = "\n\n---\n\n".join(context_blocks)

    print("\n" + "="*80)
    print("RETRIEVED CONTEXT FROM KB")
    print("="*80)
    print(context_text)
    print("="*80 + "\n")

    # LLM synthesis
    print("Generating output...")
    prompt = f"""
You are an empathetic assistant that uses examples from real Reddit users' discussions
to guide someone based on their current life goal.

User Query:
{user_input}

Here are similar cases from Reddit:
{context_text}

Using the experiences and insights quoted above, write an actionable plan
to help the user.
At the END of your response, include a "Sources" section listing all the Reddit discussions you referenced.
Format each source as: "- [Post Title]: [full permalink URL]"
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful, grounded life-strategy coach."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )

    print("\n--- LLM RESPONSE ---\n")
    print(response.choices[0].message.content)
    
    # === ensure sources are always visible ===
    print("\n" + "="*80)
    print("DISCUSSION SOURCES")
    print("="*80)
    for src in sources:
        print(src)
    print("="*80 + "\n")
    
    conn.commit()

except Exception as e:
    logger.error(f"Error during query execution: {e}")
    conn.rollback()
    raise

Generating embedding for query...


[32m2026-01-20 17:38:23.242[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mTransaction rolled back successfully[0m
[32m2026-01-20 17:38:23.244[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mRetrieving top matching submissions...[0m
[32m2026-01-20 17:38:23.298[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m35[0m - [1mFound 3 relevant submissions.[0m



RETRIEVED CONTEXT FROM KB

[POST] "There is an Arabic saying and it goes like this:"
URL: https://www.reddit.com/r/mentalhealth/comments/eo3zu0/there_is_an_arabic_saying_and_it_goes_like_this/

‚ÄúYou want to die? Then throw yourself into the sea and you‚Äôll see yourself fighting to survive. You do not want to kill yourself, rather you want to kill something inside of you‚Äù

Top Comments:
- Kind of a bad analogy because death of drowning would be immensely painful and slow and that‚Äôs why you would fight to survive
- I have another one for you. "Stand at the border of a cliff and you will want to throw yourself into it. That's you wanting to kill yourself."  
Instinct is not a rational way of seeing things. That is as bad an analogy as it can be. Even when suicidal people may try to avoid pain.
- I can relate to this, I am gay and most of my life I was in the closet  and I let others opinion make me hate myself I was drowning. Had sever depression and thought it would be better to 