In [1]:
import json
import re

import networkx as nx
import pandas as pd

## Loading data

#### Main

In [2]:
main_df = pd.read_csv('data/out/rep_main_structure.csv', converters={
    'Aka': eval,
    'Pieces': eval,
    'Larger_works': eval,
})
main_df.fillna('', inplace=True)

In [3]:
main_df.shape

(2, 8)

In [4]:
main_df.head()

Unnamed: 0,Work,Aka,Category,Pieces,Larger_works,Sources (collection/box/folder),Number of events documented as Piece,Number of events documented as Container
0,Americana,{America},Main,{Veracruzana},{n/a},SIU Programs Box 85-86; Rhoda Winter Russell P...,0,52
1,Veracruzana,{},Main,{},"{n/a, Americana}",SIU Programs Box 85-86; Rhoda Winter Russell P...,65,0


#### Dances

In [5]:
dances_df = pd.read_csv('data/out/rep_dances_structure.csv', converters={
    'Pieces': eval,
})
dances_df.fillna('', inplace=True)

In [6]:
dances_df.shape

(1, 5)

In [7]:
dances_df.head()

Unnamed: 0,Dances in Dances,Pieces,Sources (collection/box/folder),Number of events,Category
0,La Bamba,{Veracruzana},SIU Programs Box 85-86,15,Dance


#### Shows

In [8]:
shows_df = pd.read_csv('data/out/rep_shows_structure.csv', converters={
    'Aka': eval,
    'Pieces': eval,
})
shows_df.fillna('', inplace=True)

In [9]:
shows_df.shape

(2, 6)

In [10]:
shows_df.head()

Unnamed: 0,Show,Aka,Pieces,Sources (collection/box/folder),Number of events,Category
0,Caribbean Rhapsody,"{Rapsodia Caribe, A Caribbean Rhapsody, Rhapso...","{Veracruzana, Americana}",SIU Programs Box 85-86,20,Show
1,Tropical Carnival,{},"{Veracruzana, Americana}",SIU Programs Box 85-86,4,Show


## Processing data

In [11]:
def str2set(x):
    return set(y.strip() for y in x.split(';') if y.strip())

def set2str(x):
    return '; '.join(sorted(x))

In [12]:
def join_values(vs1, vs2):
    vs1_set = str2set(vs1)
    vs2_set = str2set(vs2)
    union_set = vs1_set.union(vs2_set)
    r = set2str(union_set)
    return r

In [13]:
def clean_akas(s):
    r = re.sub(' \[\[.+?\]\]', '', s)
    return r

In [14]:
# n/a means that a piece can also be a piece on its own, not always part of a larger work.
# This makes sense when n/a is along with other(s) work(s) in the column Larger work,
# but not when it's alone. In any case, it's a piece and it doesn't affect the type
# or color of the node.

g = nx.DiGraph()

for i, row in main_df.iterrows():
    work = row.Work
    pieces = row.Pieces
    larger = row.Larger_works - {'n/a'}
    style = 'Standalone' if 'n/a' in row.Larger_works else 'Other'
    
    aka = clean_akas(set2str(row.Aka))
    size = row['Number of events documented as Piece'] + row['Number of events documented as Container']
#     years = join_values(row['Years Performed'], row['Years Performed as Container (TMU)'])
    
    g.add_node(work, style=style, aka=aka, size=size)#, years=years)
    
    for p in pieces:
        g.add_node(p)
        g.add_edge(p, work)
    
    for lw in larger:
        g.add_node(lw)
        g.add_edge(work, lw)

for i, row in dances_df.iterrows():
    dance = row['Dances in Dances']
    pieces = row.Pieces
    
    aka = ''
    size = row['Number of events']
#     years = row['Years performed']
    
    g.add_node(dance, aka=aka, size=size)#, years=years)
    
    for p in pieces:
        g.add_edge(dance, p)

for i, row in shows_df.iterrows():
    show = row.Show
    pieces = row.Pieces
    
    aka = clean_akas(set2str(row.Aka))
    size = row['Number of events']
#     years = row['Years Performed']
    
    g.add_node(show, aka=aka, size=size)#, years=years)
    
    for p in pieces:
        g.add_edge(p, show)

In [15]:
def is_disconnected(g, n):
    return g.degree(n) == 0

def has_only_outgoing_edges(g, n):
    return g.in_degree(n) == 0 and g.out_degree(n) > 0

def has_only_incoming_edges(g, n):
    return g.in_degree(n) > 0 and g.out_degree(n) == 0

def has_both_edges(g, n):
    return g.in_degree(n) > 0 and g.out_degree(n) > 0

for n in g:
    if n in dances_df['Dances in Dances'].unique():
        g.nodes[n]['type'] = 'Dance'
    elif n in shows_df.Show.unique():
        g.nodes[n]['type'] = 'Show'
    else:
        g.nodes[n]['type'] = 'Other'

def all_incoming_dances(g, n):
    incoming_edges = g.in_edges(n)
    incoming_nodes = [n1 for n1, n2 in incoming_edges]
    r = all(map(lambda x: g.nodes[x]['type'] == 'Dance', incoming_nodes))
    return r

def all_outgoing_shows(g, n):
    outgoing_edges = g.out_edges(n)
    outgoing_nodes = [n2 for n1, n2 in outgoing_edges]
    r = all(map(lambda x: g.nodes[x]['type'] == 'Show', outgoing_nodes))
    return r

for n in g:
    if n in dances_df['Dances in Dances'].unique():
        pass
    elif n in shows_df.Show.unique():
        pass
    else:
        if is_disconnected(g, n):
            g.nodes[n]['type'] = 'Piece'
        elif has_only_incoming_edges(g, n):
            if all_incoming_dances(g, n):
                g.nodes[n]['type'] = 'Piece'
            else:
                g.nodes[n]['type'] = 'Container'
        elif has_only_outgoing_edges(g, n):
            g.nodes[n]['type'] = 'Piece'
        else: # has_both_edges(g, n)
            if all_incoming_dances(g, n):
                g.nodes[n]['type'] = 'Piece'
            elif all_outgoing_shows(g, n):
                g.nodes[n]['type'] = 'Container'
            else:
                g.nodes[n]['type'] = 'Mix'

for n in g:
    assert(g.nodes[n]['type'] != 'Other')

W = 10
for n1 in g:
    if 'style' not in g.nodes[n1]:
            g.nodes[n1]['style'] = g.nodes[n1]['type']
    elif g.nodes[n1]['style'] == 'Other':
            g.nodes[n1]['style'] = g.nodes[n1]['type']
    style = g.nodes[n1]['style']
    for n2 in g.neighbors(n1):
        if g.nodes[n1]['type'] == 'Container' and g.nodes[n2]['type'] == 'Show': # Red edge from Container to Show
            g[n1][n2]['style'] = 'Container'
        else: # Otherwise, general rule
            g[n1][n2]['style'] = style
        g[n1][n2]['Weight'] = 1 if style == 'Standalone' else W
        g[n1][n2]['weight'] = 1 if style == 'Standalone' else W

In [16]:
g.number_of_nodes()

5

In [17]:
g.number_of_edges()

6

In [18]:
nx.number_of_selfloops(g)

0

#### Comparison to older graph

In [19]:
# h = nx.read_gexf('data2/out/repertory_2022-05-18.gexf')

In [20]:
# for n in g:
#     if n in h:
#         if g.nodes[n]['type'] != h.nodes[n]['type']:
#             print(n, '---', h.nodes[n]['type'], '=>', g.nodes[n]['type'])

In [21]:
# for n in h:
#     if n in g:
#         if g.nodes[n]['type'] != h.nodes[n]['type']:
#             print(n, '---', h.nodes[n]['type'], '=>', g.nodes[n]['type'])

#### Comparison to column 'Category (AJM to add)'

In [22]:
# for n, data in g.nodes(data=True):
#     typ = data['type']
#     if typ == 'Piece' or typ == 'Mix' or typ == 'Container':
#         aux_df = main_df[main_df.Work == n]
#         categ = aux_df.iloc[0]['Category (AJM to add)']
#     elif typ == 'Show':
#         aux_df = shows_df[shows_df.Show == n]
#         categ = aux_df.iloc[0]['Category']
#     elif typ == 'Dance':
#         aux_df = dances_df[dances_df.Dance == n]
#         categ = aux_df.iloc[0]['Category']
#     assert(len(aux_df) == 1)
#     assert(typ == categ or typ == 'Piece' and categ == 'Piece/Standalone')

#### Filtering 1947-60

In [23]:
# def in_range(years):
#     return any(map(lambda x: 1947 <= int(x) <= 1960, years))

In [24]:
# g2 = g.copy()
# for n, data in g.nodes(data=True):
#     years = str2set(data['years'])
#     if not in_range(years):
#         g2.remove_node(n)

In [25]:
# g2.number_of_nodes()

In [26]:
# g2.number_of_edges()

In [27]:
# nx.number_of_selfloops(g2)

### Other data

#### Shortest distance matrix

In [28]:
g3 = nx.Graph(g)

In [29]:
for a, b in g.edges():
    print(a, '-->', b)
    break

Americana --> Caribbean Rhapsody


In [30]:
(a, b) in g.edges()

True

In [31]:
(b, a) in g.edges()

False

In [32]:
(a, b) in g3.edges()

True

In [33]:
(b, a) in g3.edges()

True

In [34]:
shortest_distance_matrix = dict(nx.shortest_path_length(g3))

In [35]:
distances = set()
for k1 in shortest_distance_matrix:
    d = shortest_distance_matrix[k1]
    for k2 in d:
        v = d[k2]
        distances.add(v)

In [36]:
distances

{0, 1, 2}

#### Sub and super hierarchy

In [37]:
# Loops
for n in g:
    for m in g:
        if n in g.neighbors(m) and m in g.neighbors(n):
            print('{} <--> {}'.format(n, m))

In [38]:
def direct_super_neighbors(g, x):
    return set(g.neighbors(x))

def all_super_neighbors_aux(g, x, visited):
    r = direct_super_neighbors(g, x)
    for n in r:
        if n not in visited:
            r = r.union(all_super_neighbors_aux(g, n, visited.union({n})))
    return r

def all_super_neighbors(g, x):
    return all_super_neighbors_aux(g, x, {x})

def direct_sub_neighbors(g, x):
    r = set()
    for n in g:
        ns = g.neighbors(n)
        if x in ns:
            r.add(n)
    return r

def all_sub_neighbors_aux(g, x, visited):
    r = direct_sub_neighbors(g, x)
    for n in r:
        if n not in visited:
            r = r.union(all_sub_neighbors_aux(g, n, visited.union({n})))
    return r

def all_sub_neighbors(g, x):
    return all_sub_neighbors_aux(g, x, {x})

def hierarchy(g, x):
    r = {x}
    r = r.union(all_super_neighbors(g, x))
    r = r.union(all_sub_neighbors(g, x))
    return r

In [39]:
hierarchy_dict = {}
for n in g:
    hierarchy_dict[n] = {}
    for m in hierarchy(g, n):
        hierarchy_dict[n][m] = 0

In [40]:
# COLORS
# dances     - light blue - #00b7da
# pieces     - blue       - #5c89ff
# containers - red        - #ff4533
# mix        - purple     - #bd7aff
# standalone - light gray - #cecece
# shows      - pink       - #ff0064

## Saving data

In [41]:
nx.write_gexf(g, 'web/dash-network/repertory.gexf')

In [42]:
with open('web/dash-network/assets/shortest_distance_matrix.json', 'w') as f:
    json.dump(shortest_distance_matrix, f)

s = json.dumps(shortest_distance_matrix, indent=4, sort_keys=True)
s = 'shortestDistanceMatrix = ' + s

with open('web/dash-network/assets/shortest_distance_matrix.js', 'w') as f:
    f.write(s)

In [43]:
with open('web/dash-network/assets/hierarchies.json', 'w') as f:
    json.dump(hierarchy_dict, f)

s = json.dumps(hierarchy_dict, indent=4, sort_keys=True)
s = 'hierarchies = ' + s

with open('web/dash-network/assets/hierarchies.js', 'w') as f:
    f.write(s)