# Network Visualization

In [4]:
import os
from bertopic import BERTopic
import numpy as np
import networkx as nx
from pyvis.network import Network
from sklearn.metrics.pairwise import cosine_similarity
import pickle


def network_vis(platform, label, custom_topic_names, include_outlier=True):
    current_dir = os.getcwd()
    target_path = os.path.join(current_dir,'data',f'{platform}_data','berttopic_label',f'{label}')


    # load the pkl bert_model 
    topic_model = BERTopic.load(os.path.join(target_path,f'{label}_berttopic'))


    


    topic_info = topic_model.get_topic_info()
    if topic_info.empty:
        print(f"No topic information available for label '{label}'.")
        return
    
    # Get topic embeddings (include or exclude outlier topic -1 based on parameter)
    topic_embeddings = topic_model.topic_embeddings_
    if include_outlier:
        valid_topics = topic_info['Topic'].values  # Include all topics, including -1
    else:
        valid_topics = topic_info[topic_info['Topic'] != -1]['Topic'].values  # Exclude -1
    if len(valid_topics) == 0:
        print(f"No valid topics found for label '{label}'.")
        return
    # Filter topic embeddings for valid topics only
    topic_indices = [i for i, topic in enumerate(topic_info['Topic']) if topic in valid_topics]
    topic_embeddings = topic_embeddings[topic_indices]

    
    # Compute cosine similarity between topic embeddings
    similarity_matrix = cosine_similarity(topic_embeddings)


    # Create a networkx graph
    G = nx.Graph()


    # Add nodes with custom names
    for topic_id in valid_topics:
        custom_name = custom_topic_names.get(topic_id, f"Topic_{topic_id}")  # Use custom name or fallback
        size = topic_info[topic_info['Topic'] == topic_id]['Count'].iloc[0]  # Document count as size
        G.add_node(topic_id, label=custom_name, size=size)



    # Add edges based on similarity
    threshold = 0.8  # Adjust threshold for edge density
    topic_to_index = {topic: idx for idx, topic in enumerate(valid_topics)}
    for i, topic_i in enumerate(valid_topics):
        for j, topic_j in enumerate(valid_topics[i + 1:], start=i + 1):
            if topic_i in topic_to_index and topic_j in topic_to_index:
                similarity = similarity_matrix[i, j]
                if similarity > threshold:
                    G.add_edge(topic_i, topic_j, weight=similarity)

    net = Network(height="750px", width="100%", notebook=False, directed=False)


    # Add nodes and edges to pyvis network
    for node in G.nodes:
        node_data = G.nodes[node]
        new_label = node_data.get('label', f"Topic_{node}")  # Fallback if label is missing
        size = node_data.get('size', 10) * 0.3  # Default size if missing
        print(f"Node ID: {node}, Label: {new_label}, Size: {size}")  # Debugging print
        net.add_node(int(node), label=new_label, size=size, title=new_label, color='lightblue')

    for edge in G.edges:
        weight = G.edges[edge].get('weight', 0.1)  # Default weight if missing
        min_weight, max_weight = 0.6, 1.0
        min_width, max_width = 0.5, 5.0
        scaled_width = min_width + (max_width - min_width) * (weight - min_weight) / (max_weight - min_weight)
        net.add_edge(int(edge[0]), int(edge[1]), value=weight, title=f"Similarity: {weight:.2f}",width=scaled_width, color='gray')



    # Customize visualization (spaced-out layout)
    net.set_options("""
    {
        "nodes": {
            "shape": "dot",
            "scaling": {
                "min": 10,
                "max": 20
            },
            "font": {
                "size": 35,
                "face": "arial"
            }
        },
        "edges": {
            "color": {
                "inherit": false
            },
            "smooth": {
                "enabled": true,
                "type": "dynamic"
            },
            "width": 0.5
        },
        "physics": {
            "forceAtlas2Based": {
                "gravitationalConstant": -200,
                "centralGravity": 0.005,
                "springLength": 300
            },
            "minVelocity": 0.2,
            "solver": "forceAtlas2Based"
        }
    }
    """)

    # Save the network visualization
    net.save_graph(os.path.join(target_path, f'{platform}_{label}_topic_network.html'))
    print(f"Network visualization saved ")
    
    

In [10]:
platform = "reddit"




label = "anxiety"
custom_topic_names = {
        -1: "Physical Anxiety and Medication",
        0: "Social Anxiety and Friendships",
        1: "Anxiety Medication Options",
        2: "Health Anxiety and Pain",
        3: "Workplace and Interview Anxiety",
        4: "Fear of Death and Illness",
        5: "Panic and Indecision Anxiety",
        6: "Nighttime Panic and Weed Use",
        7: "Morning Anxiety and Sleep Issues",
        8: "Social Discomfort and Anxiety",
        9: "Global News Anxiety"
    }



# label = "depression"
# custom_topic_names = {
#         -1: "Family and Therapy Depression",
#         0: "Depression and Memory Struggles",
#         1: "Exhaustion and Existential Depression",
#         2: "Family Relationship Depression",
#         3: "Sleep and Routine Depression",
#         4: "Antidepressant and Intrusive Thoughts",
#         5: "Loneliness and Social Isolation",
#         6: "Mental and Physical Exhaustion",
#         7: "College and Academic Pressure",
#         8: "Anger and Avoidance Depression"
#     }


# label = "ptsd"
# custom_topic_names = {
#         -1: "PTSD and Undecided Anxiety",
#         0: "Persistent Unease and PTSD",
#         1: "PTSD from Relationship Abuse",
#         2: "Trauma and Nightmares",
#         3: "Social and Family PTSD Triggers",
#         4: "War and Trauma PTSD",
#         5: "Car Accident Nightmares",
#         6: "PTSD Trigger Sensitivity"
#     }

network_vis(platform = platform, label = label,custom_topic_names = custom_topic_names)

Node ID: -1, Label: Physical Anxiety and Medication, Size: 307.8
Node ID: 0, Label: Social Anxiety and Friendships, Size: 287.7
Node ID: 1, Label: Anxiety Medication Options, Size: 79.2
Node ID: 2, Label: Health Anxiety and Pain, Size: 77.7
Node ID: 3, Label: Workplace and Interview Anxiety, Size: 67.8
Node ID: 4, Label: Fear of Death and Illness, Size: 28.5
Node ID: 5, Label: Panic and Indecision Anxiety, Size: 20.7
Node ID: 6, Label: Nighttime Panic and Weed Use, Size: 17.4
Node ID: 7, Label: Morning Anxiety and Sleep Issues, Size: 9.0
Node ID: 8, Label: Social Discomfort and Anxiety, Size: 9.0
Node ID: 9, Label: Global News Anxiety, Size: 7.5
Network visualization saved 


In [13]:
platform = "beyondblue"





# label = "Anxiety"
# custom_topic_names = {
#         -1: "Medication and Nighttime Fear",  # Inferred from words list
#         0: "OCD and Anxiety Experiences",
#         1: "Pet-Related Anxiety at Home",
#         2: "Workplace Stress and Anxiety",
#         3: "Overcoming Workweek Anxiety",
#         4: "Childhood Insomnia and Symptoms",
#         5: "Daily Stress and Pain Anxiety",
#         6: "Financial and Parenting Anxiety"
#     }




label = "Depression"
custom_topic_names = {
        -1: "Depression and Relationship Anger",
        0: "Family Anxiety and Kids",
        1: "Mental Health Medication Side Effects",
        2: "Childhood Fears and Uncertainty",
        3: "Morning Depression Struggles"
    }

network_vis(platform = platform, label = label,custom_topic_names = custom_topic_names)

Node ID: -1, Label: Depression and Relationship Anger, Size: 34.8
Node ID: 0, Label: Family Anxiety and Kids, Size: 46.5
Node ID: 1, Label: Mental Health Medication Side Effects, Size: 22.5
Node ID: 2, Label: Childhood Fears and Uncertainty, Size: 12.299999999999999
Node ID: 3, Label: Morning Depression Struggles, Size: 10.799999999999999
Network visualization saved 
