In [None]:
# running the setup file containing neccesary libraries and functions
%run 'notebooks/scripts/setup.ipynb'

# import libraries to enable network operations
import networkx as nx
import ast

## Functions to construct Network 1. Narrative topic - Narrative topic Relation

In [None]:
def build_network_1(df):

  # create a dictionary to store words for each topic
  topic_words_dict = {}
  for index, row in df.iterrows():
      topic_name = row['topic_theme']
      words_in_topic = " ".join([word_prob.split("'")[1] for word_prob in row[:-1] if pd.notna(word_prob)])
      topic_words_dict[topic_name] = words_in_topic

  # create dictionary to store topic data
  topics_data = {}
  for index, row in df.iterrows():
      topic_name = row['topic_theme']
      words_probs = [ast.literal_eval(word_prob) for word_prob in row[:-1] if pd.notna(word_prob)]
      words = [word_prob[0] for word_prob in words_probs]
      probabilities = [float(word_prob[1]) for word_prob in words_probs]
      topics_data[topic_name] = {'words': words, 'probabilities': probabilities}

  # generate all possible pairs of topics from the words list
  topic_pairs = list(combinations(list(topic_words_dict.keys()), 2))

  # create the co-occurrence counter with the pairs and their summed probabilities
  cooccurrence_counter = Counter()
  for topic_pair in topic_pairs:
    words_0 = topics_data[topic_pair[0]]['words']
    words_1 = topics_data[topic_pair[1]]['words']

    # find indexes of common elements between the two lists
    common_indexes_0 = [index for index, value in enumerate(words_0) if value in words_1]
    common_indexes_1 = [index for index, value in enumerate(words_1) if value in words_0]

    common_probs_0 = [value for index, value in enumerate(topics_data[topic_pair[0]]['probabilities']) if index in common_indexes_0]
    common_probs_1 = [value for index, value in enumerate(topics_data[topic_pair[1]]['probabilities']) if index in common_indexes_1]

    cooccurrence_counter[topic_pair] = round(sum(common_probs_0) + sum(common_probs_1), 4)


  # create graph
  G = nx.Graph()
  for topic in df['topic_theme'].unique():
      G.add_node(topic)
  for (topic1, topic2), weight in cooccurrence_counter.items():
      G.add_edge(topic1, topic2, weight=weight)

  return G

## Functions to construct Network 2. Class style - Class style Relation

In [None]:
def build_network_2(df):
  df_topics_grouped = df[['class_style', 'dominant_topic_theme']].groupby(['class_style', 'dominant_topic_theme']).size().reset_index(name='count')
  df_topics_grouped = df_topics_grouped[['class_style', 'dominant_topic_theme']]

  style_by_topic_df = df_topics_grouped.groupby('class_style')['dominant_topic_theme'].apply(list).reset_index()

  # create a dictionary to store the co-occurrence counts
  cooccurrence_counts = {}
  for style1, style2 in itertools.combinations(style_by_topic_df['class_style'], 2):
      topics1 = set(style_by_topic_df[style_by_topic_df['class_style'] == style1]['dominant_topic_theme'].iloc[0])
      topics2 = set(style_by_topic_df[style_by_topic_df['class_style'] == style2]['dominant_topic_theme'].iloc[0])
      cooccurrence_count = len(topics1.intersection(topics2))
      cooccurrence_counts[(style1, style2)] = cooccurrence_count

  # create the graph
  G = nx.Graph()
  styles = style_by_topic_df['class_style'].unique()
  style_pairs = list(itertools.combinations(styles, 2))
  G.add_nodes_from(styles)

  for pair in style_pairs:
    if cooccurrence_counts[pair] > 0:
      G.add_edge(pair[0], pair[1], weight= cooccurrence_counts[pair])

  return G

## Functions to construct Network 3. Narrative topic - Class Style Relation

In [None]:
def build_network_3(df, styles, topics):

  df_style_topic_weight = df[['class_style', 'Balance and Alignment', 'Stress Relief/Breath Techniques', 'Deep Relaxation/Mind Practice', 'Stretch and Release', 'Core and Strength']]

  by_style_mean = df[['class_style', 'Balance and Alignment', 'Stress Relief/Breath Techniques', 'Deep Relaxation/Mind Practice', 'Stretch and Release', 'Core and Strength']].groupby(['class_style'], as_index=False).mean()
  np_by_style_mean = by_style_mean.to_numpy()

  # create the graph
  G = nx.Graph()
  G.add_nodes_from(styles, bipartite='styles')
  G.add_nodes_from(topics, bipartite='topics')

  for i in np_by_style_mean:
    for topicIndex, topicName in enumerate(topics):
      G.add_edge(topicName, i[0], weight= i[1+topicIndex])

  return G

In [None]:
def node_projection(graph, nodes):

  def my_weight_sum(G, u, v, weight='weight'):
      w = 0.0
      for nbr in set(G[u]) & set(G[v]):
        w += G[u][nbr].get(weight, 1) + G[v][nbr].get(weight, 1)
      return w

  if nodes == 'topics':
    topic = [n for n, d in graph.nodes(data=True) if d['bipartite'] == 'topics']
    G = nx.bipartite.generic_weighted_projected_graph(graph, topic, my_weight_sum)

  if nodes == 'styles':
    style = [n for n in graph.nodes() if graph.nodes[n]['bipartite'] == 'styles']
    G = nx.bipartite.generic_weighted_projected_graph(graph, style, my_weight_sum)

  return G

def draw_projection(graph):
  plt.figure(figsize=(15, 20))
  nx.draw_networkx(graph, with_labels = True)
  plt.show()

## Functions to calculate Network measures


For unipartite graphs


In [None]:
def calculate_centralities(graph):
  centralities_dict = {}

  degree_centralities = graph.degree(weight='weight')
  betweenness_centralities = nx.betweenness_centrality(graph, weight='weight')
  eigenvector_centralities = nx.eigenvector_centrality(graph, weight='weight')
  closeness_centralities = dict(nx.closeness_centrality(graph, distance='weight')) # the more weight the closer the nodes

  for node, centrality in dict(degree_centralities).items():
    centralities_dict[node] = {'degree_centrality': round(centrality, 6)}
  for node, centrality in betweenness_centralities.items():
    centralities_dict[node]['betweenness_centrality'] = round(centrality, 6)
  for node, centrality in eigenvector_centralities.items():
    centralities_dict[node]['eigenvector_centrality'] = round(centrality, 6)
  for node, centrality in closeness_centralities.items():
    centralities_dict[node]['closeness_centrality'] = round(centrality, 6)

  df = pd.DataFrame(centralities_dict).T
  df.reset_index(inplace=True)
  df.rename(columns={'index': 'node'}, inplace=True)

  return df

def plot_centralities_distributions(df):
  # Create histograms with rows for each column
  fig, axes = plt.subplots(nrows=len(df.columns) - 1, ncols=1, figsize=(8, 12))
  plt.subplots_adjust(hspace=0.5)

  for i, column in enumerate(df.columns[1:], start=1):  # Exclude the 'Category' column
      axes[i - 1].hist(df[column], bins=10, edgecolor='k')
      axes[i - 1].set_title(f'Distribution of {column}')
      axes[i - 1].set_xlabel(column)
      axes[i - 1].set_ylabel('Frequency')
      axes[i - 1].grid(True)

  plt.show()



def calculate_clusters(graph):
    df = pd.DataFrame(columns=['Cluster', 'Members'])
    # use the Girvan-Newman algorithm to detect clusters
    clusters = nx.algorithms.community.girvan_newman(graph)
    cluster_number = []
    members = []
    for i, cluster in enumerate(clusters):
      cluster_number.append(i+1)
      members.append(cluster)
    df['Cluster'] = cluster_number
    df['Members'] = members

    return df

def detect_communities(graph):
    df = pd.DataFrame(columns=['Community', 'Members'])
    # apply the greedy modularity communities algorithm for community detection
    communities = nx.algorithms.community.greedy_modularity_communities(graph)
    community_number = []
    members = []
    for i, community in enumerate(communities):
      community_number.append(i+1)
      members.append(community)
    df['Community'] = community_number
    df['Members'] = members

    return df


def calculate_clustering_coefficient(graph):
    clustering_coefficient = nx.average_clustering(graph, weight='weight')
    return round(clustering_coefficient, 6)

def calculate_average_shortest_path(graph):
    average_shortest_path = nx.average_shortest_path_length(graph, weight='weight')
    return round(average_shortest_path, 6)

def calculate_density(graph):
    network_density = nx.density(graph)
    return round(network_density, 6)

def calculate_transitivity(graph):
    network_transitivity = nx.transitivity(graph)
    return round(network_transitivity, 6)

def general_graph_statistics(graph):
  df = pd.DataFrame(columns=['clustering_coefficient', 'average_shortest_path', 'network_density', 'network_transitivity'], index=['score'])
  df['clustering_coefficient'] = calculate_clustering_coefficient(graph)
  df['average_shortest_path'] = calculate_average_shortest_path(graph)
  df['network_density'] = calculate_density(graph)
  df['network_transitivity'] = calculate_transitivity(graph)

  return df

For bipartite graphs

In [None]:
def calculate_centralities_bipartite(graph, nodes):
  centralities_dict = {}

  degS, degT = nx.algorithms.bipartite.degrees(graph, nodes, weight='weight')
  betweenness_centralities = nx.algorithms.bipartite.betweenness_centrality(graph, nodes)
  closeness_centralities = nx.algorithms.bipartite.closeness_centrality(graph, nodes)

  for node, centrality in dict(degS).items():
    centralities_dict[node] = {'degree_centrality': round(centrality, 6)}
  for node, centrality in dict(degT).items():
    centralities_dict[node] = {'degree_centrality': round(centrality, 6)}
  for node, centrality in betweenness_centralities.items():
    centralities_dict[node]['betweenness_centrality'] = round(centrality, 6)
  for node, centrality in closeness_centralities.items():
    centralities_dict[node]['closeness_centrality'] = round(centrality, 6)

  df = pd.DataFrame(centralities_dict).T
  df.reset_index(inplace=True)
  df.rename(columns={'index': 'node'}, inplace=True)

  return df

def calculate_clustering_coefficient_bipartite(graph):
    clustering_coefficient = nx.algorithms.bipartite.average_clustering(graph)
    return round(clustering_coefficient, 6)

def calculate_density_bipartite(graph, nodes):
    network_density = nx.algorithms.bipartite.density(graph, nodes)
    return round(network_density, 6)


# nx.algorithms.bipartite.minimum_weight_full_matching(graph_3, weight='weight')
# or maximum matching
# https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.bipartite.matching.minimum_weight_full_matching.html#networkx.algorithms.bipartite.matching.minimum_weight_full_matching

## Functions to save data

In [None]:
def graph_to_csv(graph, filepath, filename):
  df_graph = pd.DataFrame(columns=['source', 'target', 'weight'])
  nodes1 = []
  nodes2 = []
  attributes = []
  for n1, n2, attr in graph.edges(data=True):
    nodes1.append(n1)
    nodes2.append(n2)
    attributes.append(attr['weight'])
  df_graph['source'] = nodes1
  df_graph['target'] = nodes2
  df_graph['weight'] = attributes

  df_graph.to_csv(f'{filepath}{filename}.csv')