In [43]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import subprocess
import json
import os

In [44]:
size = {}
projects = ["JPetStore", "DayTrader", "AcmeAir", "Plants"]
libs = "../Mo2oM/JavaParser/lib/javaparser-core-3.25.5-SNAPSHOT.jar"+os.pathsep+"../Mo2oM/JavaParser/lib/json-20230618.jar"
for proj in projects:
	json_path = f"../.data/{proj}.json"
	subprocess.run(['java', '-cp', libs, '../Mo2oM/JavaParser/Parser.java', f"../test_projects/{proj}/OneFileSource.java", json_path])
	with open(json_path, "rt") as classes_file:
		classes_info = json.load(classes_file)
	size[proj] = len(classes_info)
size

{'JPetStore': 73, 'DayTrader': 108, 'AcmeAir': 68, 'Plants': 37}

In [108]:
results = [
	{"Dataset": "JPetStore", "Model": "MEM", "Coverage": 18/37},
	{"Dataset": "DayTrader", "Model": "MEM", "Coverage": 36/74},
	{"Dataset": "AcmeAir", "Model": "MEM", "Coverage": 13/28},
	{"Dataset": "Plants", "Model": "MEM", "Coverage": 9/26}
]

In [109]:
for proj in projects:
	with open(f"results/HDBSCAN/HDBSCAN_{proj}.json") as f:
		data = json.load(f)
	df = pd.DataFrame(data)
	df["cov"] = df["microservices"].apply(lambda x: len([_ for _ in x if _!=[-1]]))
	results.append({"Dataset": proj, "Model": "HDBSCAN", "Coverage": df["cov"].median()/size[proj]})

In [110]:
for proj in projects:
	with open(f"results/Mo2oM/Mo2oM_{proj}.json") as f:
		data = json.load(f)
	df = pd.DataFrame(data)
	df_no_ms = df.drop(columns="microservices")
	normalized_df = pd.DataFrame(MinMaxScaler().fit_transform(df_no_ms.values), columns=df_no_ms.columns)
	df['score'] = normalized_df["SM"] - normalized_df["ICP"] - normalized_df["IFN"] - normalized_df["NED"]
	final_df = pd.DataFrame(columns=["n_clusters", "microservices"]).astype(dtype=df.drop(columns=["alpha", "threshold", "score", "SM", "ICP", "IFN", "NED"]).dtypes)
	for n in df["n_clusters"].unique():
		idx = df["score"][df["n_clusters"] == n].argmax()
		row = df[df["n_clusters"] == n].iloc[[idx]].drop(columns=["alpha", "threshold", "score", "SM", "ICP", "IFN", "NED"])
		final_df = pd.concat([final_df, row])
	df = final_df.reset_index(drop=True).drop(columns=["n_clusters"])
	df["cov"] = df["microservices"].apply(lambda x: len([_ for _ in x if _!=[-1]]))
	results.append({"Dataset": proj, "Model": "Mo2oM", "Coverage": df["cov"].median()/size[proj]})

In [114]:
order = {"JPetStore": 0, "DayTrader": 1, "AcmeAir": 2, "Plants": 3}
df = pd.DataFrame(results).sort_values(by="Dataset", key=lambda x: [order[_] for _ in x])
index = pd.MultiIndex.from_frame(df[["Dataset", "Model"]])
table = df.drop(columns=["Dataset", "Model"]).set_index(index)
table

Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage
Dataset,Model,Unnamed: 2_level_1
JPetStore,MEM,0.486486
JPetStore,HDBSCAN,0.821918
JPetStore,Mo2oM,0.60274
DayTrader,MEM,0.486486
DayTrader,HDBSCAN,0.712963
DayTrader,Mo2oM,0.305556
AcmeAir,MEM,0.464286
AcmeAir,HDBSCAN,0.705882
AcmeAir,Mo2oM,0.617647
Plants,MEM,0.346154


In [121]:
print(table.to_latex(float_format="%.3f", caption="Coverage").replace(r"\cline{1-3}", r"\midrule").replace("[t]", "[c]").replace("\\caption{Coverage}\n", "\\caption{Coverage}\n\\centering\n"))

\begin{table}
\caption{Coverage}
\centering
\begin{tabular}{llr}
\toprule
 &  & Coverage \\
Dataset & Model &  \\
\midrule
\multirow[c]{3}{*}{JPetStore} & MEM & 0.486 \\
 & HDBSCAN & 0.822 \\
 & Mo2oM & 0.603 \\
\midrule
\multirow[c]{3}{*}{DayTrader} & MEM & 0.486 \\
 & HDBSCAN & 0.713 \\
 & Mo2oM & 0.306 \\
\midrule
\multirow[c]{3}{*}{AcmeAir} & MEM & 0.464 \\
 & HDBSCAN & 0.706 \\
 & Mo2oM & 0.618 \\
\midrule
\multirow[c]{3}{*}{Plants} & MEM & 0.346 \\
 & HDBSCAN & 0.703 \\
 & Mo2oM & 0.892 \\
\midrule
\bottomrule
\end{tabular}
\end{table}

