In [None]:
import pandas as pd
import torch
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from math import exp
import sys
import os
import networkx as nx

import biological_fuzzy_logic_networks.biofuzznet as biofuzznet
import biological_fuzzy_logic_networks.biomixnet as biomixnet
import biological_fuzzy_logic_networks.utils as utils

In [None]:
ZERO_THRESHOLD = 0.05

In [None]:
def get_results(true_networks, optimized_networks, num_gates, threshold = ZERO_THRESHOLD):
    """
    Parameters:
        true_networks_list: list containing the true network structure for all 10 folds
        optimized_networks_list: list containing the optimized biomixnet structure for all 10 folds, n the same order as true_networks_list
        num_gates: int, number of gates that were optimized
        treshold: zero threshold for comparing to ideal gate vaule (0 or 1)

    """
    summary_df = []
    for i in range(10):
        fold_df = []
        for mixed in optimized_networks[i].mixed_gates:
            solution_dic = {}
            solution_dic["Fold"] = i
            solution_dic["Num gates"] = num_gates
            true_type = true_networks[i].nodes()[mixed]["node_type"]
            solution_dic["Gate"] = mixed
            solution_dic["True type"] = true_type
            param = torch.sigmoid(optimized_networks[i].nodes()[mixed]["gate"].AND_param)
            if (1-param) <= threshold:
                solution_dic["Predicted type"] = "logic_gate_AND"
            elif (param) <= threshold:
                solution_dic["Predicted type"] = "logic_gate_OR"
            else:
                solution_dic["Predicted type"] = "Unknown"
                print(f"Unable to decide gate at threshold {threshold}, AND_param has value {param}")
            solution_dic["Gate AND param"] = float(param.data)
            solution_dic["Correctly predicted"] = solution_dic["Predicted type"] == solution_dic["True type"]
            fold_df.append(pd.DataFrame.from_dict([solution_dic]))
        summary_df.append(pd.concat(fold_df, axis = 0, ignore_index = True))
    solution = pd.concat(summary_df, axis = 0, ignore_index= True)
    return(solution)


In [None]:
def compute_test_set_RMSE(optimized_biomixnet, test_ground_truth):
    test_set = pickle.load(open(test_ground_truth, 'rb'))
    optimized_biomixnet.initialise_random_truth_and_output(len(test_set))
    for node in optimized_biomixnet.root_nodes:
        optimized_biomixnet.nodes()[node]["output_state"] = test_set[node]
        optimized_biomixnet.nodes()[node]["ground_truth"] = test_set[node]
    optimized_biomixnet.sequential_update(optimized_biomixnet.root_nodes)
    rmse = utils.compute_RMSE_outputs(optimized_biomixnet, test_set)
    return(rmse)

In [None]:
def get_all_results(data_folder, CV_folder, gate_folder, num_gates_max, num_folds = 10):
    """
    data_folder: path to directory with data
    CV_folder: name template of the folder (inside data_folder) containing the generated data for the folds. it is then assumed to be labeled as CV_folder_{i} for fold i
    gate_folder: name template of the folder  (inside data_folder) containing the result data for the different number of mixed gates. it is then assumed to be labeled as gate_folder_{n} for number of mixed gates n
    num_gates_max: maximum number of introduced mixed gates so that all solutions from 1 mixed gate to num_ègates_max mixed gates are enumerated
    """
    all_solutions = []
    RMSEs = []
    for num_gates in range(1, num_gates_max+1):
        true_networks = [pickle.load(open(f"{data_folder}/{CV_folder}_{i}/model_structure_data_{num_gates}.p",'rb')) for i in range(num_folds)]
        optimized_networks = [pickle.load(open(f"{data_folder}/{gate_folder}_{num_gates}/model_{i}.p",'rb')) for i in range(num_folds)]
        all_solutions.append(get_results(true_networks, optimized_networks, num_gates, threshold = ZERO_THRESHOLD))
        rmse_gate = []
        for net in range(len(optimized_networks)):
            df = pd.DataFrame.from_dict([compute_test_set_RMSE(optimized_networks[net], f"{data_folder}/{CV_folder}_{net}/test_ground_truth_{num_gates}.p")])
            df["Fold"] = net
            df["Num gates"] = num_gates
            rmse_gate.append(df)
        RMSEs.append(pd.concat(rmse_gate, axis = 0))
    return(pd.concat(all_solutions, axis = 0), pd.concat(RMSEs, axis = 0))

In [None]:
results_df, RMSEs = get_all_results("yourdatafolder", "CV_experiment", "gates", 9, 10)

In [None]:
print("CORRECT GATE PREDICTIONS SUMMARY")
print(f"Correctly predicted gates overall: {results_df['Correctly predicted'].sum()/len(results_df)*100} %")
AND_gates = results_df[results_df["True type"] == "logic_gate_AND"]
print(f"Correctly predicted AND gates overall: {AND_gates['Correctly predicted'].sum()/len(AND_gates)*100} %")
OR_gates = results_df[results_df["True type"] == "logic_gate_OR"]
print(f"Correctly predicted OR gates overall: {OR_gates['Correctly predicted'].sum()/len(OR_gates)*100} %")

In [None]:
print("CORRECT PREDICTIONS PER NUMBER OF MIXED GATES IN NETWORK")

num_gates_max = 9
for num_gates in range(1, num_gates_max+1):
    print(f"\nNUMBER OF MIXED GATES: {num_gates}")
    fold_df = results_df[results_df["Num gates"] == num_gates]
    print(f"Correctly predicted gates overall: {fold_df['Correctly predicted'].sum()/len(fold_df)*100} %")
    AND_gates = fold_df[fold_df["True type"] == "logic_gate_AND"]
    print(f"Correctly predicted AND gates overall: {AND_gates['Correctly predicted'].sum()/len(AND_gates)*100} %")
    OR_gates = fold_df[fold_df["True type"] == "logic_gate_OR"]
    print(f"Correctly predicted OR gates overall: {OR_gates['Correctly predicted'].sum()/len(OR_gates)*100} %")


In [None]:
print("RMSEs")
num_gates_max = 9
for num_gates in range(1, num_gates_max+1):
    print(f"\nRMSEs for Num gates: {num_gates}")
    fold_df = RMSEs[RMSEs["Num gates"] == num_gates]
    print(fold_df.describe())