In [1]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import json

In [2]:
projects = ["JPetStore", "DayTrader", "AcmeAir", "Plants"]
methods = ['Mo2oM_TFIDF', 'Mo2oM_HardClustering', 'Mo2oM']
extra_columns = {"Mo2oM": ["alpha", "threshold", "score"],
				 "Mo2oM_TFIDF": ["alpha", "threshold", "score"],
				 "Mo2oM_HardClustering": ["alpha", "score"]}
results = []
for proj in projects:
	for method in methods:
		with open(f"results/{method}/{method}_{proj}.json") as f:
			data = json.load(f)
		df = pd.DataFrame(data).drop(columns=["microservices"])
		if method == "Mo2oM_HardClustering":
			df = df.drop(columns=["threshold"])
		normalized_df = pd.DataFrame(MinMaxScaler().fit_transform(df.values), columns=df.columns)
		df['score'] = normalized_df["SM"] - normalized_df["ICP"] - normalized_df["IFN"] - normalized_df["NED"]
		final_df = pd.DataFrame(columns=["n_clusters", "SM", "ICP", "IFN", "NED"]).astype(dtype=df.drop(columns=extra_columns[method]).dtypes)
		for n in df["n_clusters"].unique():
			idx = df["score"][df["n_clusters"] == n].argmax()
			row = df[df["n_clusters"] == n].iloc[[idx]].drop(columns=extra_columns[method])
			final_df = pd.concat([final_df, row])
		df = final_df.reset_index(drop=True).drop(columns=["n_clusters"])
		r = df.median().to_dict()
		r["Project"] = proj
		r["Model"] = method.replace("Mo2oM_HardClustering", "UniXcoder + Hard").replace("Mo2oM_TFIDF", "TF-IDF + Soft").replace("Mo2oM", "UniXcoder + Soft (ours)")
		results.append(r)
df = pd.DataFrame(results)
df = df[["Model", "Project", "SM", "ICP", "IFN", "NED"]]
table = df.pivot(index="Model", columns="Project", values=["SM", "ICP", "IFN", "NED"])#.to_latex(float_format="%.3f", escape=False)
table

Unnamed: 0_level_0,SM,SM,SM,SM,ICP,ICP,ICP,ICP,IFN,IFN,IFN,IFN,NED,NED,NED,NED
Project,AcmeAir,DayTrader,JPetStore,Plants,AcmeAir,DayTrader,JPetStore,Plants,AcmeAir,DayTrader,JPetStore,Plants,AcmeAir,DayTrader,JPetStore,Plants
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
TF-IDF + Soft,0.250615,0.211237,0.531081,0.792064,0.086608,0.031451,0.121324,0.087486,1.142857,1.2,1.532051,0.8,0.054054,0.157895,0.13216,0.026667
UniXcoder + Hard,0.180309,0.112244,0.0967,0.276971,0.360547,0.442366,0.407441,0.453413,2.111111,1.352941,1.733333,3.041667,0.044118,0.583333,0.184932,0.094595
UniXcoder + Soft (ours),0.303537,0.454908,0.288919,0.628973,0.093228,0.082416,0.120284,0.079953,1.0,1.0,1.470085,0.9375,0.058824,0.159091,0.025097,0.0


In [3]:
index = pd.MultiIndex.from_frame(df[["Project", "Model"]])
new_df = df.drop(columns=["Project", "Model"]).set_index(index)
new_tex = new_df.to_latex(float_format="%.3f", escape=False, caption="Results")
new_tex = new_tex.replace("[t]", "").replace("\\cline{1-6}", "\\midrule")
new_tex = new_tex.replace("SM", "\\textbf{SM $\\uparrow$}").replace("ICP", "\\textbf{ICP $\\downarrow$}").replace("IFN", "\\textbf{IFN $\\downarrow$}").replace("NED", "\\textbf{NED $\\downarrow$}")
new_tex = new_tex.replace("Project", "\\textbf{Dataset}").replace("Model", "\\textbf{Model}")
print(new_tex)

\begin{table}
\caption{Results}
\begin{tabular}{llrrrr}
\toprule
 &  & \textbf{SM $\uparrow$} & \textbf{ICP $\downarrow$} & \textbf{IFN $\downarrow$} & \textbf{NED $\downarrow$} \\
\textbf{Dataset} & \textbf{Model} &  &  &  &  \\
\midrule
\multirow{3}{*}{JPetStore} & TF-IDF + Soft & 0.531 & 0.121 & 1.532 & 0.132 \\
 & UniXcoder + Hard & 0.097 & 0.407 & 1.733 & 0.185 \\
 & UniXcoder + Soft (ours) & 0.289 & 0.120 & 1.470 & 0.025 \\
\midrule
\multirow{3}{*}{DayTrader} & TF-IDF + Soft & 0.211 & 0.031 & 1.200 & 0.158 \\
 & UniXcoder + Hard & 0.112 & 0.442 & 1.353 & 0.583 \\
 & UniXcoder + Soft (ours) & 0.455 & 0.082 & 1.000 & 0.159 \\
\midrule
\multirow{3}{*}{AcmeAir} & TF-IDF + Soft & 0.251 & 0.087 & 1.143 & 0.054 \\
 & UniXcoder + Hard & 0.180 & 0.361 & 2.111 & 0.044 \\
 & UniXcoder + Soft (ours) & 0.304 & 0.093 & 1.000 & 0.059 \\
\midrule
\multirow{3}{*}{Plants} & TF-IDF + Soft & 0.792 & 0.087 & 0.800 & 0.027 \\
 & UniXcoder + Hard & 0.277 & 0.453 & 3.042 & 0.095 \\
 & UniXcoder + Soft (

In [4]:
print(r"\begin{table}")
print(r"\caption{Ablation}")
print(r"\centering")
print(r"\begin{tabular}{lrrrr}")
print(r"\toprule")
print("\\textbf{Model} & \\textbf{SM $\\uparrow$} & \\textbf{ICP $\\downarrow$} & \\textbf{IFN $\\downarrow$} & \\textbf{NED $\\downarrow$} \\\\")
for proj in projects:
	print(r"\midrule")
	print(r"\multicolumn{5}{c}{\textbf{"+proj+"}}\\\\")
	print(r"\midrule")
	tex = df[df["Project"]==proj].drop(columns="Project").to_latex(float_format="%.3f", index=False).replace("\\begin{tabular}{lrrrr}\n", "")
	tex = tex.replace("\\end{tabular}\n", "").replace("\\toprule\n", "").replace("\\bottomrule\n", "").replace("Model & SM & ICP & IFN & NED \\\\\n\\midrule\n", "")
	print(tex)
print(r"\bottomrule")
print(r"\end{tabular}")
print(r"\end{table}")

\begin{table}
\caption{Ablation}
\centering
\begin{tabular}{lrrrr}
\toprule
\textbf{Model} & \textbf{SM $\uparrow$} & \textbf{ICP $\downarrow$} & \textbf{IFN $\downarrow$} & \textbf{NED $\downarrow$} \\
\midrule
\multicolumn{5}{c}{\textbf{JPetStore}}\\
\midrule
TF-IDF + Soft & 0.531 & 0.121 & 1.532 & 0.132 \\
UniXcoder + Hard & 0.097 & 0.407 & 1.733 & 0.185 \\
UniXcoder + Soft (ours) & 0.289 & 0.120 & 1.470 & 0.025 \\

\midrule
\multicolumn{5}{c}{\textbf{DayTrader}}\\
\midrule
TF-IDF + Soft & 0.211 & 0.031 & 1.200 & 0.158 \\
UniXcoder + Hard & 0.112 & 0.442 & 1.353 & 0.583 \\
UniXcoder + Soft (ours) & 0.455 & 0.082 & 1.000 & 0.159 \\

\midrule
\multicolumn{5}{c}{\textbf{AcmeAir}}\\
\midrule
TF-IDF + Soft & 0.251 & 0.087 & 1.143 & 0.054 \\
UniXcoder + Hard & 0.180 & 0.361 & 2.111 & 0.044 \\
UniXcoder + Soft (ours) & 0.304 & 0.093 & 1.000 & 0.059 \\

\midrule
\multicolumn{5}{c}{\textbf{Plants}}\\
\midrule
TF-IDF + Soft & 0.792 & 0.087 & 0.800 & 0.027 \\
UniXcoder + Hard & 0.277 & 0.453 &