In [19]:
import sys
sys.path.append("..")

import multiprocessing as mp
from dynetworkx import dynetworkx as dnx
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [20]:
# This list all of the edge orders for the motifs in the table above
motifs = [[((1,2),(3,2),(1,2)), ((1,2),(3,2),(2,1)), ((1,2),(3,2),(1,3)), ((1,2),(3,2),(3,1)), ((1,2),(3,2),(2,3)), ((1,2),(3,2),(3,2))],
          [((1,2),(2,3),(1,2)), ((1,2),(2,3),(2,1)), ((1,2),(2,3),(1,3)), ((1,2),(2,3),(3,1)), ((1,2),(2,3),(2,3)), ((1,2),(2,3),(3,2))],
          [((1,2),(3,1),(1,2)), ((1,2),(3,1),(2,1)), ((1,2),(3,1),(1,3)), ((1,2),(3,1),(3,1)), ((1,2),(3,1),(2,3)), ((1,2),(3,1),(3,2))],
          [((1,2),(1,3),(1,2)), ((1,2),(1,3),(2,1)), ((1,2),(1,3),(1,3)), ((1,2),(1,3),(3,1)), ((1,2),(1,3),(2,3)), ((1,2),(1,3),(3,2))],
          [((1,2),(2,1),(1,2)), ((1,2),(2,1),(2,1)), ((1,2),(2,1),(1,3)), ((1,2),(2,1),(3,1)), ((1,2),(2,1),(2,3)), ((1,2),(2,1),(3,2))],
          [((1,2),(1,2),(1,2)), ((1,2),(1,2),(2,1)), ((1,2),(1,2),(1,3)), ((1,2),(1,2),(3,1)), ((1,2),(1,2),(2,3)), ((1,2),(1,2),(3,2))]]

mot = [ seq for row in motifs for seq in row]

In [21]:
# create toy graph

graph = dnx.ImpulseDiGraph()
graph.add_edge('A', 'B', 1)
graph.add_edge('A', 'C', 2)
graph.add_edge('B', 'A', 3)
graph.add_edge('A', 'B', 4)

In [22]:
# stores all of the (pandas dataframes that tells you how many times each node appears in each motif) for each of the (10 graphs) in count_table

table = list()
    
pool = mp.Pool(mp.cpu_count())
results = pool.starmap_async(dnx.count_temporal_motif, [(graph, seq, 7*86400) for seq in mot]).get()
pool.close()

count = list()
for d in results:
    #count.append(d) # if using older version of dynetworkx
    count.append(d[2])
table.append(count)

count_table = pd.DataFrame(table, columns=mot)

In [23]:
df = count_table # saves count_table as df so we can manipulate count_table but also retain the original copy

# count_table_multiindex is a list that stores each multiindex dataframe for each of the 10 datasets
count_table_multiindex = []

for current_dataset in range(len(df.index)):

    # there are three (or two) positions in each motif
    positions = [1, 2, 3]

    # generates list of all countries by compiling all countries in all 10 datasets in all 36 motifs
    countries = []
    for i in range(len(df.index)):
        for k in range(len(df.columns)):
            countries += list(df.iloc[i][k].index)
    countries = sorted(list(set(countries)))

    # generates the index for the multiindex by taking the product of mot and positions
    multiindex_vals = pd.MultiIndex.from_product([mot, positions], names=["motifs", "positions"])

    # creates the dataframe with indices from multiindex_vals and columns from countries
    dataset_df = pd.DataFrame(columns=countries, index=multiindex_vals)

    # iterates through all positions in the multiindex dataframe (through all motifs, position in each motif, and each country) and assigns the appropriate value based on df
    for motif in mot:
        for pos in positions:
            for country in countries:

                # checks if country and position is present in df and then assigns it
                if country in df.loc[current_dataset][motif].index and pos in df.loc[current_dataset][motif].columns:
                    dataset_df[country].loc[(motif, pos)] = df.loc[current_dataset][motif].loc[country][pos]

    # adds the dataframe to the list
    count_table_multiindex.append(dataset_df)


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

In [24]:
# changes all 'nan' to 0, normalizes vectors, deletes countries with too few edges

count_table0 = count_table_multiindex[0].copy()

count_table0 = count_table0.fillna(0)

countries = list(count_table0.columns)

# remove countries with less than some number of edges; normalization
for column in count_table0.columns:
    sum = count_table0[column].sum()
    count_table0[column] = count_table0[column].apply(lambda x: x / sum) # normalize

# gets updated list of countries in dataframe
countries = list(count_table0.columns)


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [25]:
# appends a heatmap to fig
def insert_heatmap(fig, data, max_val, row, col):
    fig.append_trace(go.Heatmap(
                     z=data,
                     x=[1,2,3,4,5,6],
                     y=[6,5,4,3,2,1],
                     #colorscale='greens',
                     #colorbar=dict(x=0.21),
                     zmax=1, #max_val,
                     zmid=0.5,
                     zmin=0,
                     #coloraxis='coloraxis',
                     colorscale = [[0, 'rgb(256,256,256)'], [0.25, 'rgb(198,233,191)'], [0.5, 'rgb(116,196,118)'], [0.75, 'rgb(32,137,68)'], [1, 'rgb(0,68,27)']],
                  ),
                  row, col)
    
def create_centroid_heatmaps(centroids):
    num_clusters = len(centroids)

    all_the_data = [] # stores all of the data for the heatmaps: list of n centroids, each centroid has list of 3 positions, each position is a 6x6 list

    for centroid in range(num_clusters):
        # fetches the data for each position
        pos1 = []
        pos2 = []
        pos3 = []
        for i in range(36):
            pos1.append(float(centroids[centroid][3 * i]))
            pos2.append(float(centroids[centroid][3 * i + 1]))
            pos3.append(float(centroids[centroid][3 * i + 2]))

        # adjusts the data so it fits into the 6x6 heatmaps
        pos1_adjusted = []
        pos2_adjusted = []
        pos3_adjusted = []
        for i in range(6):
            i = 5-i
            pos1_adjusted.append(pos1[i*6:i*6+6])
            pos2_adjusted.append(pos2[i*6:i*6+6])
            pos3_adjusted.append(pos3[i*6:i*6+6])

        data = [pos1_adjusted, pos2_adjusted, pos3_adjusted]
        all_the_data.append(data)
            
    # makes the subplot titles
    subplot_titles = []
    for i in range(num_clusters):
        subplot_titles.append('Position 1')
        subplot_titles.append('Position 2')
        subplot_titles.append('Position 3')

    # makes the fig
    fig = make_subplots(rows=num_clusters, cols=3, subplot_titles=subplot_titles)

    # inserts heatmaps into fig
    for k in range(len(all_the_data)):

        # calculates the maximum value of all the data
        max_val = 0
        for i in range(len(all_the_data[k])):
            for j in range(len(all_the_data[k][i])):
                max_val = max([max_val] + all_the_data[k][i][j])

        # inserts heatmaps
        for i in range(len(all_the_data[k])):
            insert_heatmap(fig, all_the_data[k][i], max_val, k+1, i+1)

    fig.update_layout(title=f'Centroid Data - {num_clusters} Clusters',
                    height=500*num_clusters,
                    width=1300,
                    #coloraxis = {'colorscale':'greens'},
                    )
    fig.update_yaxes(autorange="reversed")
    fig.update_xaxes(tickvals = [1,2,3,4,5,6])
    
    return fig

In [26]:
# heatmap for node A

fig = create_centroid_heatmaps([list(count_table0['A'])])
fig.update_layout(title=f'TMPP Heatmap for Node A')
fig.show()