In [6]:
import pandas as pd

In [7]:
lexicon_df = pd.read_csv("../data/processed/dataset_with_weights.csv")
lexicon = {
    row.word.lower(): {"state": row.state, "weight": row.weight}
    for _, row in lexicon_df.iterrows()
}
vocab = {w: i for i, w in enumerate(lexicon_df["word"].unique())}

In [10]:
def calculate_state_percentages(selected_words_with_intensity, lexicon):
    scores = {"hypo": 0, "hyper": 0, "flow": 0}
    for word, intensity in selected_words_with_intensity:
        w = word.lower()
        if w in lexicon:
            state = lexicon[w]["state"]
            weight = lexicon[w]["weight"]
            scores[state] += intensity * weight
        else:
            print(f"⚠️ Word '{word}' not found in lexicon")

    total = sum(scores.values())
    percentages = {s: (v / total * 100) if total else 0 for s, v in scores.items()}
    return percentages


In [11]:
# Simulating user selecting words & intensities
user_input = [
    ("Anxious", 4),
    ("Restless", 3),
    ("Numb", 2)
]

percentages = calculate_state_percentages(user_input, lexicon)

print("State proportions:")
for state, pct in percentages.items():
    print(f"{state}: {pct:.1f}%")


State proportions:
hypo: 22.1%
hyper: 77.9%
flow: 0.0%


In [13]:
while True:
    word = input("Enter word (or 'done'): ")
    if word.lower() == "done":
        break
    intensity = int(input("Enter intensity (1-5): "))
    user_input.append((word, intensity))

print(calculate_state_percentages(user_input, lexicon))


⚠️ Word 'lonely' not found in lexicon
{'hypo': 45.66929133858268, 'hyper': 54.330708661417326, 'flow': 0.0}
