In [17]:
import networkx as nx
import pandas as pd

# List of file paths
file_paths = [
    'datacleaning/aishihik_lake_preprocessed.csv',
    'datacleaning/AkatoreA_preprocessed.csv',
    'datacleaning/AkatoreB_preprocessed.csv',
    'datacleaning/cold_lake_preprocessed.csv',
    'datacleaning/lake_of_the_woods_preprocessed.csv',
    'datacleaning/mcgregor_river_preprocessed.csv',
    'datacleaning/parsnip_river_preprocessed.csv',
    'datacleaning/sbay_lake_huron_preprocessed.csv',
    'datacleaning/smallwood_reservoir_preprocessed.csv',
    'datacleaning/Venlaw_preprocessed.csv'
]

def calculate_network_properties_and_links(file_path):
    """Calculate network properties and predict links."""
    try:
        # Load preprocessed data
        edge_list = pd.read_csv(file_path)
        G = nx.from_pandas_edgelist(edge_list, source="source", target="target")
        
        # Network properties
        num_nodes = G.number_of_nodes()
        num_edges = G.number_of_edges()

        # Link prediction (Common Neighbors)
        link_pred_scores = [
            (u, v, len(list(nx.common_neighbors(G, u, v))))
            for u, v in nx.non_edges(G)
        ]
        link_pred_scores_sorted = sorted(link_pred_scores, key=lambda x: x[2], reverse=True)[:5]

        # Print results
        print(f"Results for {file_path}:")
        print(f"  Number of nodes: {num_nodes}")
        print(f"  Number of edges: {num_edges}")
        print("  Top 5 predicted links (Common Neighbors):")
        for u, v, score in link_pred_scores_sorted:
            print(f"    ({u}, {v}) - Score: {score}")
        print("-" * 50)

    except Exception as e:
        print(f"An error occurred with '{file_path}': {e}")

# Loop through all files and calculate properties, printing the results
for file_path in file_paths:
    result = calculate_network_properties(file_path)
    print(result)



    Results for datacleaning/aishihik_lake_preprocessed.csv:
    ------------------------------
      Number of nodes (species): 39
      Number of edges (interactions): 248
    
Top 5 Predicted Links (based on Common Neighbors):
      (Salvelinus namaycush, Esox lucius) - Prediction Score: 31
      (Salvelinus namaycush, Thymallus arcticus) - Prediction Score: 31
      (Salvelinus namaycush, Catostomus catostomus) - Prediction Score: 31
      (Salvelinus namaycush, Coregonus clupeaformis) - Prediction Score: 31
      (Salvelinus namaycush, Cottus cognatus) - Prediction Score: 31
--------------------------------------------------


    Results for datacleaning/AkatoreA_preprocessed.csv:
    ------------------------------
      Number of nodes (species): 126
      Number of edges (interactions): 6279
    
Top 5 Predicted Links (based on Common Neighbors):
      (Hydrobiosis parumbripennis, Neozephlebia scita) - Prediction Score: 85
      (Hydrobiosis parumbripennis, Polypedellum) - Pre

In [6]:
import pandas as pd
import networkx as nx

def calculate_network_properties_and_links(file_path):
    """Calculate network properties and predict links."""
    try:
        # Load preprocessed data
        edge_list = pd.read_csv(file_path)
        G = nx.from_pandas_edgelist(edge_list, source="source", target="target")
        
        # Network properties
        num_nodes = G.number_of_nodes()
        num_edges = G.number_of_edges()

        # Link prediction (Common Neighbors)
        link_pred_scores = [
            (u, v, len(list(nx.common_neighbors(G, u, v))))
            for u, v in nx.non_edges(G)
        ]
        link_pred_scores_sorted = sorted(link_pred_scores, key=lambda x: x[2], reverse=True)[:5]

        # Print results
        print(f"Results for {file_path}:")
        print(f"  Number of nodes: {num_nodes}")
        print(f"  Number of edges: {num_edges}")
        print("  Top 5 predicted links (Common Neighbors):")
        for u, v, score in link_pred_scores_sorted:
            print(f"    ({u}, {v}) - Score: {score}")
        print("-" * 50)

    except Exception as e:
        print(f"An error occurred with '{file_path}': {e}")

# List of preprocessed file paths
file_paths = [
    "datacleaning/aishihik_lake_preprocessed.csv",
    "datacleaning/cold_lake_preprocessed.csv",
    "datacleaning/lake_of_the_woods_preprocessed.csv",
    "datacleaning/mcgregor_river_preprocessed.csv",
    "datacleaning/parsnip_river_preprocessed.csv",
    "datacleaning/sbay_lake_huron_preprocessed.csv",
    "datacleaning/smallwood_reservoir_preprocessed.csv",
]

# Process each file
for file_path in file_paths:
    calculate_network_properties_and_links(file_path)


Results for datacleaning/aishihik_lake_preprocessed.csv:
  Number of nodes: 39
  Number of edges: 248
  Top 5 predicted links (Common Neighbors):
    (Salvelinus namaycush, Esox lucius) - Score: 31
    (Salvelinus namaycush, Thymallus arcticus) - Score: 31
    (Salvelinus namaycush, Catostomus catostomus) - Score: 31
    (Salvelinus namaycush, Coregonus clupeaformis) - Score: 31
    (Salvelinus namaycush, Cottus cognatus) - Score: 31
--------------------------------------------------
Results for datacleaning/cold_lake_preprocessed.csv:
  Number of nodes: 54
  Number of edges: 473
  Top 5 predicted links (Common Neighbors):
    (Stizostedion vitreum, Esox lucius) - Score: 43
    (Stizostedion vitreum, Coregonus artedii) - Score: 43
    (Stizostedion vitreum, Salvelinus namaycush) - Score: 43
    (Stizostedion vitreum, Catostomus catostomus) - Score: 43
    (Stizostedion vitreum, Lota lota) - Score: 43
--------------------------------------------------
Results for datacleaning/lake_of_th