# Inputs & outputs
- **Inputs:** co-occurrence artifacts in `../final_data/s7_datasets/` (e.g., `temp_lib_co/` objects) and `datasets/library_community_network_20_compact.csv`.
- **Outputs:** library co-occurrence graph and topic labeling for Supplementary Figure S7.

In [ ]:
import importlib
import library_cooccurrence #import the module here, so that it can be reloaded.
from library_cooccurrence import *
from collections import Counter
import matplotlib
import datamapplot

# plot fig S7

In [None]:
data_path="../final_data/s7_datasets"

In [None]:
importlib.reload(library_cooccurrence)

node_community_dict = load_obj("node_community_dict_124", data_path + 'temp_lib_co/')
node_color_dict = load_obj("node_color_dict_124", data_path + 'temp_lib_co/')
lib_matrix = load_obj("lib_matrix_124", data_path + 'temp_lib_co/')
lib_std = load_obj("lib_std_124", data_path + 'temp_lib_co/')


random.seed(43)
cc_pmi, cc_pmi_matrix, cc_std, cc_std_matrix, Q = build_library_pmi_zscore(lib_matrix, lib_std, 0.05)

zscore_dict = {(ccp[0], ccp[1]) : ccp[2]/ccstd[2] for ccp, ccstd in zip(cc_pmi, cc_std)}

G_library_4998 = nx.Graph()
G_library_4998.add_nodes_from([l for l in node_community_dict.keys()])

zscore_threshold = 20

G_library_4998.add_weighted_edges_from([ccp for ccp in cc_pmi if zscore_dict[(ccp[0], ccp[1])] > zscore_threshold], weight = 'library_pmi')

component_list = [c for c in nx.connected_components(G_library_4998)]
len_list = [len(c) for c in component_list]
list1, list2 = zip(*sorted(zip(len_list, component_list), reverse=True))

G_4998_major_component = G_library_4998.subgraph(list2[0])

for l in G_4998_major_component.nodes():
    G_4998_major_component.nodes[l]['community_pmi'] = node_color_dict[l]
    G_4998_major_component.nodes[l]['viz'] = {}
    
    G = G_4998_major_component
    c = node_color_dict[l]
    n = l

    G.nodes[n]['viz'] = {}
    G.nodes[n]['viz']['color'] = {'r':int(c[1:3], 16), 'g':int(c[3:5],16),'b':int(c[5:7],16), 'a':1}

for ed in G_4998_major_component.edges:
    G_4998_major_component.edges[ed]['weight'] = G_4998_major_component.edges[ed]['library_pmi']

#nx.write_gexf(G_4998_major_component, f'G_4998_major_component_{zscore_threshold}.gexf')

df = pd.read_csv('datasets/library_community_network_20_compact.csv')

node_community_dict = load_obj("node_community_dict_124", data_path + 'temp_lib_co/')
node_color_dict = load_obj("node_color_dict_124", data_path + 'temp_lib_co/')

cluster_list_gray = [n for n,c in node_community_dict.items() if int(c) > 10]

for n in cluster_list_gray:
    node_color_dict[n] = '#D9DADB'

description_str = '''Operating System Services and Process Management: Libraries like os, sys, subprocess, shutil, and contextlib are fundamental for operating system services, process management, and file/directory operations.

Deep Learning and Advanced AI Models: Core libraries such as torch, transformers, pytorch_lightning, and tqdm are primarily used for building, training, and deploying deep learning models.

Modern Web Services and Asynchronous APIs: Including typing, asyncio, fastapi, openai, grpc, and kubernetes, this domain covers asynchronous programming, API creation, and building LLM/AI applications.

Scientific Computing and Data Analysis: Dominated by numpy, pandas, scipy, matplotlib, and sklearn, these are the foundation for advanced mathematical computation and general data analysis.

Data Acquisition and Text Processing: Contains libraries like requests, csv, streamlit, nltk, and selenium, focusing on web scraping, natural language processing, and application interfacing.

Web Backends and Workflow Orchestration: Features airflow, sqlalchemy, flask, pymongo, and jwt, indicating a focus on API development, automated job scheduling, and database access layers.

Enterprise Web Frameworks and Content Management: Libraries such as django, rest_framework, celery, and wagtail are core to developing complex, feature-rich web applications and managing their content.

TensorFlow and JAX Ecosystems and HPC: Centered around tensorflow, jax, keras, and flax, this covers specialized Google/DeepMind-centric deep learning and high-performance computing (HPC).

Embedded Systems and IoT Integration: With libraries like homeassistant, unittest, voluptuous, and zigpy, the domain is integrating and testing smart home systems and IoT devices.

Geospatial and Scientific Data Modeling: Includes dask, xarray, osgeo, shapely, and netCDF4, essential for handling large-scale multidimensional scientific and spatial data.'''

description_list = description_str.split('\n')

description_label_temp = [p.split(':')[0] for p in description_list]
description_labels = [p for p in description_label_temp if len(p) >0]

label_dict = {}
for n, c in node_community_dict.items():
    if int(c) <= 10:
        label_dict[n] = description_labels[int(c) - 1]
    else:
        label_dict[n] = "No Topic"

matplotlib.rcParams["figure.dpi"] = 72


fig, ax = datamapplot.create_plot(np.dot(np.array([[0,-1],[1,0]]) ,np.array([df.X/500, df.Y/500])).T, 
                                  [],
                                  marker_color_array=[node_color_dict[n] for n in df.Label]
                                  )

#fig.savefig("datamapplot-library_network_combine_10.svg")