In [None]:
import os
import sys

from pathlib import Path

import numpy as np
import pandas as pd

sys.path.insert(0, os.getcwd())

In [None]:
import seaborn as sns

from matplotlib import pyplot as plt

In [None]:
p = [
    "#000000",
    "#E69F00",
    "#56B4E9",
    "#009E73",
    "#FB6467FF",
    "#808282",
    "#F0E442",
    "#440154FF",
    "#0072B2",
    "#D55E00",
    "#CC79A7",
    "#C2CD23",
    "#918BC3",
    "#FFFFFF",
]

In [None]:
ROOT_DIR = Path("/path/to/a/folder/containing/another/folder/named/labels/with/all/the/labels/of/all/synthetic/data/")
LABELS_DIR = ROOT_DIR / "labels"

In [None]:
data_frames_list = []

for file in os.listdir(LABELS_DIR):
    if ".svg" in file:
        continue
    data_frames_list.append(pd.read_csv(LABELS_DIR / file))

In [None]:
every_label = pd.concat(data_frames_list)
every_label.drop(columns=["OldTarget"], axis=1, inplace=True)

In [None]:
sub0_plus_07_mask = np.abs(every_label.loc[:, "Subgraph0"] - 0.70710678) < 0.00001
sub0_minus_07_mask = np.abs(every_label.loc[:, "Subgraph0"] + 0.70710678) < 0.00001

sub1_plus_07_mask = np.abs(every_label.loc[:, "Subgraph1"] - 0.70710678) < 0.00001
sub1_minus_07_mask = np.abs(every_label.loc[:, "Subgraph1"] + 0.70710678) < 0.00001

In [None]:
l1 = every_label[
    (sub1_minus_07_mask | sub1_plus_07_mask) & (sub0_plus_07_mask | sub0_minus_07_mask)
].shape[0]

l2 = every_label[(sub1_minus_07_mask | sub1_plus_07_mask)].shape[0]

assert l1 == l2

every_label = every_label[
    (sub1_minus_07_mask | sub1_plus_07_mask) & (sub0_plus_07_mask | sub0_minus_07_mask)
]  # According to our theory jus tone should do the trick!

In [None]:
pos_or_neg = lambda x, y: "Pos"+str(y) if np.abs(x - 0.70710678) < 0.00001 else "Neg"+str(y)

every_label["ID0"] = every_label["Subgraph0"].apply(pos_or_neg, args=(0,))
every_label["ID1"] = every_label["Subgraph1"].apply(pos_or_neg, args=(1,))

In [None]:
every_label.drop(columns=["Subgraph0", "Subgraph1"], axis=1, inplace=True)
every_label.reset_index(inplace=True, drop=True)

In [None]:
every_label = every_label[~(every_label.iloc[:, 1:7] == 0).all(axis=1)]

In [None]:
every_label_melted = pd.melt(
    every_label,
    id_vars=["GraphName", "ID0", "ID1"],
    value_vars=[
        "Subgraph2",
        "Subgraph3",
        "Subgraph4",
        "Subgraph5",
        "Subgraph6",
        "Subgraph7",
    ],
    var_name="Subgraph Type",
    value_name="Z-Score",
)

every_label_melted_ready = pd.melt(
    every_label_melted,
    id_vars=["GraphName", "Subgraph Type", "Z-Score"],
    value_vars=["ID0", "ID1"],
    var_name="ID",
    value_name="ID Size3",
)

In [None]:
subgraph_name = {
    "Subgraph2": "4-path",
    "Subgraph3": "4-cycle",
    "Subgraph4": "4-star",
    "Subgraph5": "tri-pan",
    "Subgraph6": "bi-fan",
    "Subgraph7": "4-clique",
}

id_name = {
    "Neg0": "3-path - SP",
    "Pos0": "3-path + SP",
    "Neg1": "3-clique - SP",
    "Pos1": "3-clique + SP",
}

In [None]:
every_label_melted_ready["Subgraph Type"] = every_label_melted_ready[
    "Subgraph Type"
].apply(lambda x: subgraph_name[x])

every_label_melted_ready["ID Size3"] = every_label_melted_ready[
    "ID Size3"
].apply(lambda x: id_name[x])

In [None]:
sns.set_context("paper", font_scale=1.1)

g = sns.FacetGrid(
    every_label_melted_ready[
        (every_label_melted_ready["ID Size3"] == "3-path + SP")
        | (every_label_melted_ready["ID Size3"] == "3-path - SP")
    ],
    col="Subgraph Type",
    col_wrap=3,
    hue="ID Size3",
    palette=p[1:],
    sharey=False,
    aspect=19 / 11,
)
g.map_dataframe(sns.histplot, "Z-Score", alpha=0.5, linewidth=0)
g.add_legend()
g.set_titles('{col_name}')
g.tight_layout()

plt.savefig(ROOT_DIR/'size3_vs_subgraph0.pdf', dpi=1200, bbox_inches = 'tight')
plt.close()

In [None]:
sns.set_context("paper", font_scale=1.1)

g = sns.FacetGrid(
    every_label_melted_ready[
        (every_label_melted_ready["ID Size3"] == "3-clique + SP")
        | (every_label_melted_ready["ID Size3"] == "3-clique - SP")
    ],
    col="Subgraph Type",
    col_wrap=3,
    hue="ID Size3",
    palette=p[1:],
    sharey=False,
    aspect=19 / 11,
)
g.map_dataframe(sns.histplot, "Z-Score", alpha=0.5, linewidth=0)
g.add_legend()
g.set_titles('{col_name}')
g.tight_layout()

plt.savefig(ROOT_DIR/'size3_vs_subgraph1.pdf', dpi=1200, bbox_inches = 'tight')
plt.close()

In [None]:
zs = every_label_melted_ready[
    ((every_label_melted_ready["ID Size3"] == "3-path + SP"))
    & (every_label_melted_ready["Subgraph Type"] == "bi-fan")
]["Z-Score"].to_numpy()

In [None]:
np.where(np.abs(zs) - 0.1 <= 0)[0].shape[0]/zs.shape[0]