In [54]:
from sklearn.preprocessing import normalize
import pandas as pd
import numpy as np
import json

In [68]:
cluster_sizes = "high"

In [69]:
projects = ["JPetStore", "DayTrader", "AcmeAir", "Plants"]
methods = ["Mo2oM_Full", "Mono2Multi_Full", "Mo2oM_HardClustering", "Mo2oM_TFIDF_Full"]
method_names = {"Mo2oM_Full": r"\textbf{UniXcoder + GNN Soft (ours)}",
				"Mono2Multi_Full": "UniXcoder + Distance Soft",
				"Mo2oM_HardClustering": "UniXcoder + GNN Hard",
				"Mo2oM_TFIDF_Full": "TF-IDF + GNN Soft"}
metrics = ["SM", "ICP", "IFN", "NED"]
metrics_sign = {"SM": 1, "ICP": -1, "IFN": -1, "NED": -1}
quartiles = {"low": 25, "medium": 50, "high": 75}
q = quartiles[cluster_sizes]
results = {}

for project in projects:
	results[project] = {}
	for method in methods:
		results[project][method] = {}
		with open(f"../results/{method}/{method}_{project}.json", "r") as f:
			data = json.load(f)
			df = pd.DataFrame(data).drop(["microservices"], axis=1)
			quartile = np.percentile(df["n_clusters"], q)
			n_clusters = df["n_clusters"][np.argmin(np.abs(df["n_clusters"] - quartile))]
			df = df[df["n_clusters"] == n_clusters]
			s = normalize([df["SM"].to_numpy()])
			for metric in metrics[1:]:
				s -= normalize([df[metric].to_numpy()])
			idx = np.argmax(s)
			for metric in metrics:
				results[project][method][metric] = df[metric].iloc[idx]

In [70]:
bests = []
for project in results:
	for metric in metrics:
		best_method = None
		best_metric = float('-inf') * metrics_sign[metric]
		for method in methods:
			if metrics_sign[metric] == 1:
				if results[project][method][metric] > best_metric:
					best_metric = results[project][method][metric]
					best_method = method
			else:
				if results[project][method][metric] < best_metric:
					best_metric = results[project][method][metric]
					best_method = method
		bests.append((project, metric, best_method))

In [71]:
bests

[('JPetStore', 'SM', 'Mono2Multi_Full'),
 ('JPetStore', 'ICP', 'Mo2oM_Full'),
 ('JPetStore', 'IFN', 'Mo2oM_Full'),
 ('JPetStore', 'NED', 'Mono2Multi_Full'),
 ('DayTrader', 'SM', 'Mono2Multi_Full'),
 ('DayTrader', 'ICP', 'Mo2oM_Full'),
 ('DayTrader', 'IFN', 'Mo2oM_Full'),
 ('DayTrader', 'NED', 'Mo2oM_HardClustering'),
 ('AcmeAir', 'SM', 'Mono2Multi_Full'),
 ('AcmeAir', 'ICP', 'Mo2oM_Full'),
 ('AcmeAir', 'IFN', 'Mo2oM_Full'),
 ('AcmeAir', 'NED', 'Mono2Multi_Full'),
 ('Plants', 'SM', 'Mono2Multi_Full'),
 ('Plants', 'ICP', 'Mo2oM_Full'),
 ('Plants', 'IFN', 'Mo2oM_Full'),
 ('Plants', 'NED', 'Mono2Multi_Full')]

In [72]:
table_head = r'''	\begin{table}[ht]
		\centering
		\begin{tabular}{|l|c|c|c|c|}
			\hline
			\rowcolor{gray!20} % Now light gray (previously our method's color)
			\rule{0pt}{14pt}
			\textbf{Method} & \textbf{SM $\uparrow$} & \textbf{ICP $\downarrow$} & \textbf{IFN $\uparrow$} & \textbf{NED $\downarrow$} \\'''
table_tail = r'''		\end{tabular}
	\end{table}'''

In [74]:
print(table_head)
for project in projects:
	print(r"\hline")
	print(f"\\multicolumn{{5}}{{|c|}}{{\\multirow{{2}}{{*}}{{\\textbf{{{project}}}}}}} \\\\") 
	print(r"\multicolumn{5}{|c|}{} \\")
	print(r"\hline")
	for method in reversed(methods):
		line = method_names[method]
		for metric in metrics:
			if (project, metric, method) in bests:
				line += f" & \\textbf{{{results[project][method][metric]:.3f}}}"
			else:
				line += f" & {results[project][method][metric]:.3f}"
		line += r"\\"
		if method == "Mo2oM_Full":
			print(r"\hline")
			print(r"\rowcolor{yellow!15}")
			print(line)
			print(r"\hline")
		else:
			print(line)
print(table_tail)

	\begin{table}[ht]
		\centering
		\begin{tabular}{|l|c|c|c|c|}
			\hline
			\rowcolor{gray!20} % Now light gray (previously our method's color)
			\rule{0pt}{14pt}
			\textbf{Method} & \textbf{SM $\uparrow$} & \textbf{ICP $\downarrow$} & \textbf{IFN $\uparrow$} & \textbf{NED $\downarrow$} \\
\hline
\multicolumn{5}{|c|}{\multirow{2}{*}{\textbf{JPetStore}}} \\
\multicolumn{5}{|c|}{} \\
\hline
TF-IDF + GNN Soft & 0.136 & 0.132 & 0.036 & 0.379\\
UniXcoder + GNN Hard & 0.056 & 0.482 & 0.643 & 0.205\\
UniXcoder + Distance Soft & \textbf{0.293} & 0.402 & 0.500 & \textbf{0.055}\\
\hline
\rowcolor{yellow!15}
\textbf{UniXcoder + GNN Soft (ours)} & 0.259 & \textbf{0.043} & \textbf{0.036} & 0.550\\
\hline
\hline
\multicolumn{5}{|c|}{\multirow{2}{*}{\textbf{DayTrader}}} \\
\multicolumn{5}{|c|}{} \\
\hline
TF-IDF + GNN Soft & 0.204 & 0.148 & 0.071 & 0.458\\
UniXcoder + GNN Hard & 0.103 & 0.451 & 0.600 & \textbf{0.407}\\
UniXcoder + Distance Soft & \textbf{0.860} & 0.592 & 0.595 & 0.450\\
\hline
\row