In [2]:
%pip install -U sentence-transformers

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import sys
print(sys.executable)

c:\Users\manav\AppData\Local\Programs\Python\Python312\python.exe


In [4]:
import pandas as pd
import json
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
json_path = "../data/raw/initial_word_list.json"

# Load JSON
with open(json_path, "r", encoding="utf-8") as f:
    word_data = json.load(f)

# Extract into lists
hypo_words = word_data["hypo"]
hyper_words = word_data["hyper"]
flow_words = word_data["flow"]

print("Hypo words:", hypo_words)
print("Hyper words:", hyper_words)
print("Flow words:", flow_words)


Hypo words: ['numb', 'frozen', 'empty', 'heavy', 'alone', 'lonely', 'disconnected', 'hopeless', 'despair', 'invisible', 'withdrawn', 'dissociated', 'tired', 'faint', 'passive', 'foggy', 'apathetic', 'i can’t…', 'what’s the point?', 'low', 'weak', 'detached', 'spaced-out', 'slow', 'still', 'muted', 'distant', 'blank', 'vacant', 'shut down', 'fatigued', 'unmotivated', 'sluggish', 'dull', 'uninterested', 'silent', 'indifferent', 'lifeless', 'exhausted', 'unresponsive', 'powerless', 'isolated', 'lacking energy', 'collapsed', 'drained', 'flat', 'lack of will', 'checked out', "can't move", 'low-spirited', 'sad', 'unfeeling', 'quiet']
Hyper words: ['anxious', 'angry', 'panicked', 'overwhelmed', 'restless', 'tight', 'racing', 'agitated', 'frustrated', 'tense', 'unsafe', 'defensive', 'rigid', 'chaotic', 'scattered', 'i have to…', 'i’m not safe', 'something bad will happen', 'uneasy', 'jumpy', 'short-tempered', 'over-alert', 'fidgety', 'hot', 'wound up', 'hyper', 'pressured', 'impatient', 'snapp

In [6]:
anchor_words = {
    "hypo": ["numb", "disconnected", "hopeless"],
    "hyper": ["anxious", "agitated", "unsafe"],
    "flow": ["calm", "grounded", "safe"]
}

In [7]:
# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to compute weight
def get_weight(word, state, is_anchor=False):
    if is_anchor:
        return 1.0
    
    # Encode anchors & target word
    anchors_vec = model.encode(anchor_words[state], convert_to_tensor=True)
    word_vec = model.encode([word], convert_to_tensor=True)
    
    # Calculate cosine similarities
    sims = util.cos_sim(word_vec, anchors_vec)[0]
    avg_sim = float(sims.mean())  # average similarity to all anchors
    
    # first scaling formula : to get weights in range 0.75 to 0.95 
    # scaled_weight = 0.75 + (avg_sim * 0.20)
    # Problem with scaling formula 1: not enough variation in weights

    # second scaling formula : assign weight directly to the cosine similarity value
    weight = max(0.3, avg_sim)
    return round(weight, 2)

# Build lexicon with weights
lexicon = []
for w in hypo_words:
    lexicon.append({"word": w.lower(), "state": "hypo", "weight": get_weight(w, "hypo")})
for w in hyper_words:
    lexicon.append({"word": w.lower(), "state": "hyper", "weight": get_weight(w, "hyper")})
for w in flow_words:
    lexicon.append({"word": w.lower(), "state": "flow", "weight": get_weight(w, "flow")})

# Save to CSV
df = pd.DataFrame(lexicon)
df.to_csv("../data/processed/dataset_with_weights2.csv", index=False)
print(df.head())


     word state  weight
0    numb  hypo    0.57
1  frozen  hypo    0.37
2   empty  hypo    0.39
3   heavy  hypo    0.30
4   alone  hypo    0.36
