# Main Program
Author: Owen Fava

In [None]:
import ast
import matplotlib.pyplot as plt
import networkx as nx
import nltk
import numpy as np
import pandas as pd
import spacy
from empath import Empath
from gensim.models import KeyedVectors, Word2Vec
from nltk.sentiment import SentimentIntensityAnalyzer
from stellargraph import StellarGraph
from stellargraph.mapper import FullBatchNodeGenerator
from tensorflow.keras.models import load_model
from tqdm import tqdm
from stellargraph.layer import GraphConvolution

nlp = spacy.load("en_core_web_md")
nltk.download("words")
nltk.download('vader_lexicon')
psy_ner = spacy.load("./model/psy_ner")

In [None]:
user_input = input("Enter your input: ")

def clean_sentence(sentence):
    text = nlp(sentence)

    preprocessing = " ".join([token.lemma_ for token in text if not token.is_stop])
    
    words = preprocessing.split()
    cleaned_sentence = [word for word in words if word.isalpha()]
    
    return ' '.join(cleaned_sentence)

# FOR TESTING PASS THIS SENTENCE: "I am in a really dark moment in my life right now. My husband is a controlling pos that believes heâ€™s a doctor and doesnâ€™t allow me to get any kind of treatment for my BPD. I recently moved to his country and I am unable to do anything by myself (I donâ€™t speak Korean), also he keeps the salary of my job so I donâ€™t have money. I canâ€™t move back to my country either, my family stopped to talk to me because I married him, so how can I cope with all the suicide thoughts I have everyday? I also suffer from anger management issues and without medicine, I canâ€™t control myself and we fight a lot. I feel really lonely here, he works 24/7, but I prefer that than being with him honestly. Heâ€™s controlling and even violent sometimes. What do you recommend to treat my BPD?"
cleaned_sentence = clean_sentence(user_input)
print(f"Original sentence: {user_input}")
print(f"Cleaned sentence: {cleaned_sentence}")

In [None]:
lexicon = Empath()
sentiment_intensity_analyzer = SentimentIntensityAnalyzer()

data = pd.DataFrame(columns=["sentence", "psy_labels", "semantic_relationships", "sentiment", "emotional_categories"])

MH_NER = [
    "ANXIETY DISORDERS",
    "BIPOLAR DISORDERS",
    "DEPRESSIVE DISORDERS",
    "DISRUPTIVE IMPULSE-CONTROL, AND CONDUCT DISORDERS",
    "DISSOCIATIVE DISORDERS",
    "EATING DISORDERS",
    "NEURO-COGNITIVE DISORDERS",
    "NEURO-DEVELOPMENTAL DISORDERS",
    "OBSESSIVE-COMPULSIVE AND RELATED DISORDERS",
    "PERSONALITY DISORDERS",
    "PSYCHEDELIC DRUGS",
    "SCHIZOPHRENIA SPECTRUM AND OTHER PSYCHOTIC DISORDERS",
    "SEXUAL DYSFUNCTIONS",
    "SLEEP-WAKE DISORDERS",
    "SOMATIC SYMPTOM RELATED DISORDERS",
    "SUBSTANCE-RELATED DISORDERS",
    "SYMPTOMS",
    "TRAUMA AND STRESS RELATED DISORDERS",
]

EMPATH_CATS = [
    "help",
    "violence",
    "sleep",
    "medical_emergency",
    "cold",
    "hate",
    "cheerfulness",
    "aggression",
    "envy",
    "anticipation",
    "health",
    "pride",
    "nervousness",
    "weakness",
    "horror",
    "swearing_terms",
    "suffering",
    "sexual",
    "fear",
    "monster",
    "irritability",
    "exasperation",
    "ridicule",
    "neglect",
    "fight",
    "dominant_personality",
    "injury",
    "rage",
    "science",
    "work",
    "optimism",
    "warmth",
    "sadness",
    "emotional",
    "joy",
    "shame",
    "torment",
    "anger",
    "strength",
    "ugliness",
    "pain",
    "negative_emotion",
    "positive_emotion",
]

def create_rel_feature(text):
    def _extract_relations(text):
        relations = []
        if not isinstance(text, str):
            return relations
        
        doc = nlp(text)

        for sent in doc.sents:
            for token in sent:
                if token.dep_ in ["nsubj", "dobj"]:
                    relations.append((token.head.text, token.dep_, token.text))
        return relations

    sentence_relationship = _extract_relations(text)

    return sentence_relationship

def create_psylabel_feature(text):
    def _extract_psy_labels(text):
        mh_labels = { }
        if not isinstance(text, str):
            return mh_labels

        doc = psy_ner(text)

        for ent in doc.ents:
            if ent.label_ in MH_NER:
                if ent.label_ not in mh_labels:
                    mh_labels[ent.label_] = set()
                mh_labels[ent.label_].add(ent.text)
        for label in mh_labels:
            mh_labels[label] = list(mh_labels[label])
        return mh_labels

    combined_labels = _extract_psy_labels(text)

    return combined_labels

def create_sentiment_feature(text):
    def _get_vader_sentiment(text):
        score = sentiment_intensity_analyzer.polarity_scores(text)

        return score["compound"] if score is not None else np.NaN

    combined_labels = _get_vader_sentiment(text)

    return combined_labels

def create_emotional_categories_scores_feature(text):
    def _get_empath_sentiment(text):
        scores = lexicon.analyze(text, categories=EMPATH_CATS, normalize=True)
        if scores is not None:
            return {category: round(score, 2) for category, score in scores.items()}
        else:
            return { }

    combined_labels = _get_empath_sentiment(text)

    return combined_labels

data.loc[len(data)] = [cleaned_sentence, create_psylabel_feature(text=cleaned_sentence), create_rel_feature(text=cleaned_sentence), create_sentiment_feature(text=cleaned_sentence), create_emotional_categories_scores_feature(cleaned_sentence)]
print(data)

In [None]:
clean_data = []

data = data.reset_index(drop=True)
labels = data.iloc[0]["psy_labels"]
relationships = data["semantic_relationships"]
sentence = data.iloc[0]["sentence"]

for relationship in relationships:
    for rel in relationship:
        if len(rel) == 3:
            word1, dep, word2 = rel
            clean_data.append(
                {
                    "Word1": word1,
                    "Dependency": dep,
                    "Word2": word2,
                    "MHlabels": labels,
                    "sentence": sentence
                }
            )
        else:
            print(f"Issue with relationship: {rel}")

clean_data_frame = pd.DataFrame(clean_data)
print(clean_data_frame)

In [None]:
graph = nx.Graph()

for row in tqdm(clean_data_frame.itertuples(), total=len(clean_data_frame), desc="Processing Rows", position=0):
    word1_lowercase = row.Word1.lower()
    graph.add_node(word1_lowercase)

    word2_lowercase = row.Word2.lower()
    graph.add_node(word2_lowercase)

    mh_labels_dict = ast.literal_eval(str(row.MHlabels))

    # Determine the relationship direction based on 'Dependency'
    if row.Dependency == 'dobj':
        graph.add_edge(word1_lowercase, word2_lowercase, dependency=row.Dependency, label={**mh_labels_dict})
    elif row.Dependency == 'nsubj':
        graph.add_edge(word1_lowercase, word2_lowercase, dependency=row.Dependency, label={**mh_labels_dict})
    else:
        continue

nx.draw(graph, with_labels=True)
plt.show()

In [None]:
word_embeddings = KeyedVectors.load("word2vec_embeddings.bin")

node_to_remove = []

for node in graph.nodes:
    if node in word_embeddings.wv:
        attrs = {node: {"embedding": word_embeddings.wv[node]}}
        nx.set_node_attributes(graph, attrs)
    else:
       node_to_remove.append(node)

for node in node_to_remove:
    graph.remove_node(node)

print(graph.nodes.data(True))
nx.draw(graph, with_labels=True)
plt.show()

In [None]:
node_data = {node: data for node, data in graph.nodes(data=True)}
# node_df = pd.DataFrame.from_dict({(i): node_data[i][j] for i in node_data.keys() for j in node_data[i].keys()}, orient='index')
# node_df.reset_index(inplace=True)
# node_df.rename(columns={'level_0': 'node', 'level_1': 'attribute'}, inplace=True)
# Create a mapping between string and numeric identifiers
node_mapping = {node: idx for idx, node in enumerate(node_data.keys())}
node_df = pd.DataFrame.from_dict({(node_mapping[node]): values["embedding"] for node, values in node_data.items()}, orient="index").reset_index()
# print(node_df)

edge_data = [(source, target, data) for source, target, data in graph.edges(data=True)]
edge_df = pd.DataFrame(edge_data, columns=['source', 'target', 'edge_attribute'])
source_nodes = edge_df["source"]
target_nodes = edge_df["target"]
edge_df = pd.concat([source_nodes, target_nodes], axis=1)
edge_df.replace(node_mapping, inplace=True)

stellar_graph = StellarGraph(nodes=node_df, edges=edge_df)
print(stellar_graph.info())

In [None]:
model = load_model("models/model_fold_1.h5", custom_objects={"GraphConvolution": GraphConvolution})

if model is not None:
    print("Model is loaded")
else:
    print("Model is not loaded")

In [None]:
generator = FullBatchNodeGenerator(stellar_graph, method="gcn")
new_data_gen = generator.flow(stellar_graph.nodes())

predictions = model.predict(new_data_gen)
print(predictions)