# Libraries

In [3]:
import contractions
import language_tool_python
import os
import re
import pandas as pd
from fastpunct import FastPunct
from language_tool_python.utils import correct
from punctfix import PunctFixer
from spellchecker import SpellChecker
from symspellpy import SymSpell, Verbosity
from textblob import TextBlob

# Helper Functions

## check_text()

### Contractions + PySpellChecker + LanguageTool

In [13]:
# set up Java environment for LanguageTool
java_home = r"C:\Program Files\Eclipse Adoptium\jdk-21.0.8.9-hotspot"
os.environ["JAVA_HOME"] = java_home
os.environ["PATH"] = java_home + r"\bin;" + os.environ["PATH"]

# Initialize LanguageTool for english
tool = language_tool_python.LanguageTool('en')  # English

# Function to check and correct text
def check_text(text):
    print("This uses Contractions + LanguageTool + PySpellChecker")
    # Expand contractions
    text = contractions.fix(text)

    # Check grammar only
    matches = tool.check(text)
    text = language_tool_python.utils.correct(text, matches)

    # Check for spelling errors
    spell = SpellChecker()
    
    # Tokenize while preserving punctuation
    tokens = re.findall(r"\w+|[^\w\s]", text, re.UNICODE)
    
    corrected_tokens = []
    for token in tokens:
        if token.isalpha():
            corrected = spell.correction(token)
            corrected_tokens.append(corrected if corrected else token)
        else:
            corrected_tokens.append(token)
    
    # Reconstruct the paragraph
    corrected_text = ""
    for i, token in enumerate(corrected_tokens):
        if i > 0:
            prev = corrected_tokens[i - 1]
            # Add space if both current and previous tokens are alphanumeric
            if (token.isalnum() and prev.isalnum()) or (prev.isalnum() and token in ['(', '[']):
                corrected_text += " "
            # Add space after punctuation if needed
            elif prev in ['.', ',', ';', ':', '?', '!'] and token.isalnum():
                corrected_text += " "
        corrected_text += token

    return corrected_text

# Read Posts and Comments Data

In [6]:
posts_df = pd.read_csv('./data/BeyondBlue/conditions_data_post_stitched.csv')
comments_df = pd.read_csv('./data/BeyondBlue/data_comments_stitched.csv')
posts_df.head()

Unnamed: 0,Post_ID,Post_Title,Post_Content,Post_Author,Post_Author_Rank,Post_Date,Post_Category,Number_of_Comments,Post_URL
0,Anxi-1,I’m stuck!,I’ve never written on a forum like this before...,Guest_39557583,Community Member,20-06-2025,Anxiety,6,/t5/anxiety/i-m-stuck/td-p/611578
1,Anxi-2,Back injury anxiety,Hi. I'm new here. I am extremely anxious when ...,Guest42,Community Member,10-07-2025,Anxiety,0,/t5/anxiety/back-injury-anxiety/td-p/612114
2,Anxi-3,"Anxiety, Injustice, and Fear: Workplace Exploi...","Hi everyone,I’m going through one of the most ...",Joker_J,Community Member,09-07-2025,Anxiety,1,/t5/anxiety/anxiety-injustice-and-fear-workpla...
3,Anxi-4,Im lost and wasn't sure what should I do next.,I am international student to Tasmania in 2021...,tevont,Community Member,20-06-2025,Anxiety,2,/t5/anxiety/im-lost-and-wasn-t-sure-what-shoul...
4,Anxi-5,Just broke free from rude friend and I still f...,"Recently, I have left my old friend who would ...",waffle_puppy,Community Member,27-06-2025,Anxiety,3,/t5/anxiety/just-broke-free-from-rude-friend-a...


# Testing the check_text() functions

In [14]:
# Sample one observation and apply check_text() to its Post_Content
sample = posts_df.sample(n=1, random_state=42).iloc[0]
col = 'Post_Content' if 'Post_Content' in posts_df.columns else posts_df.columns[0]
original = sample[col]
print("Original:\n", original, "\n")
if isinstance(original, str) and original.strip():
    corrected = check_text(original)
else:
    corrected = original
print("Corrected:\n", corrected)

Original:
 My name is Dennis, I am 77, married to an amazing lady for 55years who has been with me through thick and thin. I have had anxiety even before I knew the word. I try not to blame my condition on my early childhood but it is so hard. My last relapse was triggered when we moved to a rural area and I couldn't handle it so we returned to Adelaide and rented the house.i am at present being treated by The Older Persons Health Team who I cannot speak to highly about. Up to now I have not been one to conform to medication but I relize if I don't I won' t get better. Can anyone suggest more up to date texts on anxiety? 

This uses Contractions + LanguageTool + PySpellChecker
Corrected:
 My name is Dennis, I am 77, married to an amazing lady for 55 years who has been with me through thick and thin. I have had anxiety even before I knew the word. I try not to blame my condition on my early childhood but it is so hard. My last relapse was triggered when we moved to a rural area and I co

# List of all Comment_Author_Rank

In [4]:
unique_ranks = sorted(comments_df['Comment_Author_Rank'].dropna().unique())
print(unique_ranks)

['Beyond Blue Staff', 'Blue Voices Member', 'Champion Alumni', 'Community Champion', 'Community Member', 'Moderator', 'Valued Contributor']


In [None]:

post_id = 'Depr-6474'

if 'Post_ID' not in posts_df.columns:
    print("Column 'Post_ID' not found in posts_df")
else:
    match = posts_df.loc[posts_df['Post_ID'] == post_id, 'Post_Content']
    if match.empty:
        print(f"No post found with Post_ID {post_id}")
    else:
        for i, content in enumerate(match.tolist(), 1):
            print(f"\n--- Post {i} (Post_ID={post_id}) ---\n")
            if isinstance(content, str):
                print(content)
            else:
                print(repr(content))



--- Post 1 (Post_ID=Depr-6474) ---

I broke down again yesterday and wanted to end it all. I would never do it, but its the only thought that I have atm. I think about how bad I feel every day and I just want these feelings to go away. I dont know where to go from here. I feel like I just exist with no purpose what so ever. I have lost the will and motivation for life and have completely lost myself in the process and it scares me that I will never feel happy again. I just feel empty inside. I go to work as a distraction and try to fill up my weekend with things to do, but I dont get any enjoyment out of it. I feel like I have hit rock bottom and I am trying to get out of it, but I dont know where to go from here. I feel so alone, even though I have good friends, I just don't feel they understand. I feel guilty because my family know that I am not being myself but I dont know how to find myself again. I am on anti-depressants and am seeing a psychologist, but I feel like its not enoug

In [18]:
a = "This is a sample text with a error I live with my mother she is nice"
check_text(a)

'This is a sample text with an error I live with my mother she is nice'

# Cross Encoders

In [None]:
from sentence_transformers import CrossEncoder
import torch

# Load the pretrained CrossEncoder
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', activation_fn=torch.nn.Sigmoid())




[0.9998173 0.0131243]


In [38]:
# Example: scoring two candidate passages for a query
pairs = [
    ("Does the patient want to end their life?",
     "I broke down again yesterday and wanted to end it all."),
    ("How many people live in Berlin?",
     "Berlin is well known for its museums.")
]

scores = model.predict(pairs)
print(scores)  # Higher score = more relevant

[0.0005176 0.0131243]
