In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
from typing import List, Dict, Tuple
import io
import sys
import pickle
import itertools
import datetime
import copy
from tqdm import tqdm
import random
import csv
import json
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math

import torch

import torch.optim.lr_scheduler as lr_scheduler
import torch_geometric
import torch_geometric.transforms as T
import torch_geometric.transforms
import torch_geometric.datasets
import torch_geometric.nn
from torch_geometric.utils import to_networkx

from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore', module='sklearn')

In [None]:
from env_variables import *
from utils_helpers import *

In [None]:
results_output_dir = "./results"
if not os.path.exists(results_output_dir):
    os.makedirs(results_output_dir)

# Bipartite Matching Algorithms

In [None]:
def get_job_params_bipartite(debug = False):
    data_types = [
                    #"Synthetic_random_walk_2D_z_normal_distribution",
                    #"Synthetic_lin_1D_y_random_walk_z_fixed",
                    #"Synthetic_lin_1D_y_fixed_z_fixed",
                    "Real_automatic",
                    "Real"
    ]

    combinations = {
        "data_type_train": data_types,
        "model_type":[
                        #"Hopcroft_Karp",
                        "Minimum Weight"
        ],
        "knn_inter_nodes":[
                            #"min",
                            #7,
                            10
        ],
        "knn_inter_nodes_max": [7],
       "knn_intra_nodes":[0],
        "normalize":[True, False],
        "node_feats":[[
            'Y', 
            'X', 
            'Z', 
            'node_type', 
            'ID'
        ]],
        "edge_feats":[[
    'source',
    'target',
     'edge_label',
     'delta_x',
     'delta_y',
     'delta_z',
     'weight',
     'edge_type',
     'angle_orientation_theta',
     'angle_orientation_phi']],
    }


    job_parameters = []


    # Generate all possible combinations of the dictionary values
    for values in itertools.product(*combinations.values()):
        # Generate a dictionary for the combination of values
        job_dict = dict(zip(combinations.keys(), values))
        job_dict["scale_features"] = True if "Real" in job_dict["data_type_train"] else False
        job_parameters.append(job_dict)
    
    if(debug):
        print("Total Number of jobs is:",len(job_parameters))
        print(json.dumps(job_parameters, indent = 1))
    return job_parameters

In [None]:
def get_graph_list_bipartite(job_parameters, debug = False):
    #build dataframes
    graph_list_dict_bipartite = {}

    for params in tqdm(job_parameters):

        params_list = [params["data_type_train"], params["knn_inter_nodes"], params["knn_intra_nodes"], 
                                        params["knn_inter_nodes_max"],params["normalize"],
                                        params["scale_features"], str(params["node_feats"]), str(params["edge_feats"])]
        params_list = [str(param_) for param_ in params_list]
        graph_key = "_".join(params_list)

        if graph_key not in graph_list_dict_bipartite:
            graph_list = get_graph_list(params["data_type_train"], params["knn_inter_nodes"], params["knn_intra_nodes"], 
                                            params["knn_inter_nodes_max"], normalize = params["normalize"],
                                            scale_feats = params["scale_features"],
                                            node_feats = params["node_feats"], edge_feats = params["edge_feats"],
                                            shuffle = False)



            graph_list_dict_bipartite[graph_key] = graph_list

        graph_list_instance = list(graph_list_dict_bipartite.values())[0]
        graph_instance = graph_list_instance[0]
        graph_instance_properties = vars(graph_instance).keys()
        if(debug):
            print(graph_instance_properties)
            
            print(graph_instance.pyg_graph)

            display(graph_instance.nodes_df)
            
            display(graph_instance.edges_df_knn)
            
            print(graph_instance.pyg_graph.x)

            print(graph_instance.pyg_graph.edge_attr)

    return graph_list_dict_bipartite

In [None]:
def train_bipartite(graph_list_dict_bipartite, job_parameters, debug = False, make_plots = True):
    results_list_mf = []

    for params in job_parameters:
        print("\n\n\n####################################")
        metrics_list_bipartite = []

        params_list = [params["data_type_train"], params["knn_inter_nodes"], params["knn_intra_nodes"], 
                                        params["knn_inter_nodes_max"],params["normalize"],
                                        params["scale_features"], str(params["node_feats"]), str(params["edge_feats"])]
        params_list = [str(param_) for param_ in params_list]
        graph_key = "_".join(params_list)
        graph_list = graph_list_dict_bipartite[graph_key]

        for graph in graph_list:
            graph_id, nodes_df, edges_df, edges_df_knn, k_intra, k_inter = graph.graph_id, graph.nodes_df, graph.edges_df, graph.edges_df_knn, graph.k_intra, graph.k_inter
            print("Graph_ID:",graph_id,"K_INTRA:",k_intra, "K_INTER:",k_inter)

            edges_df_bipartite_graph = edges_df_knn.copy()
            nx_G_knn = nx_build_graph(nodes_df, edges_df_bipartite_graph)

            model_type = params["model_type"]

            #get the bipartite edges_list
            if(model_type=="Hopcroft_Karp"):
                nx_bipartite_edges_list = nx.bipartite.maximum_matching(nx_G_knn)
            elif(model_type=="Eppstein"):
                nx_bipartite_edges_list = nx.bipartite.eppstein_matching(nx_G_knn)
            elif(model_type=="Minimum Weight"):
                nx_bipartite_edges_list = nx.bipartite.minimum_weight_full_matching(nx_G_knn)
            else:
                raise ValueError("Model not implemented!")

            #convert to Dataframe
            edges_df_bipartite = nx_convert_dict_to_edges_df(nx_bipartite_edges_list)
            #apply the pred labels to edges_df_bipartite, taking as input the edges_df_knn
            edges_df_bipartite["edge_label"] = 1
            edges_df_bipartite = apply_edges_df_label(edges_df_bipartite, edges_df_knn)

            metrics_bipartite, edge_labels_string_bipartite = eval_edges_df(edges_df, edges_df_bipartite)
            metrics_list_bipartite.append(metrics_bipartite)

            print(json.dumps(metrics_bipartite, indent = 1))

            #Save results to file
            bipartite_results_array = pred_df_to_csv(edges_df_bipartite, graph.nodes_df_original)
            output_file_dir =  results_output_dir+"/"+params["data_type_train"]+"_"+params["model_type"]+"_" +\
                                    str(params["knn_inter_nodes"])+"_"+str(params["knn_intra_nodes"])+"/"
            if not os.path.exists(output_file_dir):
                os.makedirs(output_file_dir)
            output_file_path = os.path.join(output_file_dir, graph.graph_id)
            array_to_csv(bipartite_results_array, output_file_path)

            if(make_plots):
                fig_bipartite = df_make_plot(nodes_df, edges_df_bipartite, edge_labels_string_bipartite, "Bipartite")
                plt.show()

        ####################################
        ### Aggregate Metrics at the end  ##
        ####################################

        metrics_bipartite_aggregated = aggregate_metrics(metrics_list_bipartite)
        print(json.dumps(metrics_bipartite_aggregated,indent = 1))

        result_bipartite = {}
        result_bipartite["aggregated_metrics"] = metrics_bipartite_aggregated
        params["angle_features"] = "NA"
        params["constraints"] = "NA"
        result_bipartite["job_parameters"] = params
        results_list_mf.append(result_bipartite)
    return results_list_mf

In [None]:
def plot_results_bipartite(results_list_bipartite):
    plot_df_mf = plot_table(results_list_bipartite, metrics_dict_entries = [None])
    plot_df_mf = plot_df_mf.sort_values(by=["K Inter", 'Algorithm', 'Data Train', 'Data Test'])
    display(plot_df_mf)
    plot_df_mf = plot_df_mf.drop(["Data Train", "Data Test", "Scale"], axis=1)
    display(plot_df_mf)
    print(plot_df_to_latex(plot_df_mf))
    return plot_df_mf

In [None]:
job_parameters_bipartite = get_job_params_bipartite(debug = True)

In [None]:
graph_list_dict_bipartite = get_graph_list_bipartite(job_parameters_bipartite, debug = False)

In [None]:
results_list_bipartite = train_bipartite(graph_list_dict_bipartite, job_parameters_bipartite, debug = True, make_plots = True)

In [None]:
plot_results_bipartite(results_list_bipartite) 