In [1]:
import pandas as pd
import json

In [2]:
results = {}
projects = ["JPetStore", "DayTrader", "AcmeAir", "Plants"]
methods = ["Mo2oM", "Mono2Multi", "Mo2oM_HardClustering", "Mo2oM_TFIDF"]
method_names = {"Mo2oM": "UniXcoder with GNN-based Soft Clustering",
				"Mono2Multi": "UniXcoder with Distance-Based Soft Clustering",
				"Mo2oM_HardClustering": "UniXcoder with GNN-based Hard Clustering",
				"Mo2oM_TFIDF": "TF-IDF with GNN-based Soft Clustering"}
metrics = ["SM", "ICP", "IFN", "NED"]
metrics_sign = {"SM": 1, "ICP": -1, "IFN": -1, "NED": -1}
for project in projects:
	results[project] = {}
	for method in methods:
		results[project][method] = {}
		with open(f"../results/{method}/{method}_{project}.json", "r") as f:
			data = json.load(f)
			df = pd.DataFrame(data)
			for metric in metrics:
					results[project][method][metric] = df[metric].mean()

In [3]:
bests = []
for project in results:
	for metric in metrics:
		best_method = None
		best_metric = float('-inf') * metrics_sign[metric]
		for method in methods:
			if metrics_sign[metric] == 1:
				if results[project][method][metric] > best_metric:
					best_metric = results[project][method][metric]
					best_method = method
			else:
				if results[project][method][metric] < best_metric:
					best_metric = results[project][method][metric]
					best_method = method
		bests.append((project, metric, best_method))

In [4]:
bests

[('JPetStore', 'SM', 'Mo2oM_TFIDF'),
 ('JPetStore', 'ICP', 'Mo2oM'),
 ('JPetStore', 'IFN', 'Mo2oM'),
 ('JPetStore', 'NED', 'Mo2oM_HardClustering'),
 ('DayTrader', 'SM', 'Mo2oM_TFIDF'),
 ('DayTrader', 'ICP', 'Mo2oM_TFIDF'),
 ('DayTrader', 'IFN', 'Mo2oM_TFIDF'),
 ('DayTrader', 'NED', 'Mono2Multi'),
 ('AcmeAir', 'SM', 'Mono2Multi'),
 ('AcmeAir', 'ICP', 'Mo2oM'),
 ('AcmeAir', 'IFN', 'Mo2oM'),
 ('AcmeAir', 'NED', 'Mo2oM_HardClustering'),
 ('Plants', 'SM', 'Mo2oM'),
 ('Plants', 'ICP', 'Mo2oM'),
 ('Plants', 'IFN', 'Mo2oM_TFIDF'),
 ('Plants', 'NED', 'Mono2Multi')]

In [33]:
table_head = r'''	\begin{table}[ht]
		\centering
		\begin{tabular}{lcccccccccccccccccccc}
			\hline
			& \multicolumn{16}{c}{\textbf{Projects}} \\
			\cline{2-17}
			& \multicolumn{4}{c}{\textbf{JPetStore}} & \multicolumn{4}{c}{\textbf{DayTrader}} & \multicolumn{4}{c}{\textbf{AcmeAir}} & \multicolumn{4}{c}{\textbf{Plants}} \\
			\cline{2-17}
			\textbf{Model} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} \\
			\hline'''
table_tail = r'''		\end{tabular}
	\end{table}
'''

In [35]:
print(table_head)
for method in methods:
	line = method_names[method]
	for project in projects:
		for metric in metrics:
			if (project, metric, method) in bests:
				line += f" & \\textbf{{{results[project][method][metric]:.3f}}}"
			else:
				line += f" & {results[project][method][metric]:.3f}"
	line += r"\\"
	print(line)
print(table_tail)

	\begin{table}[ht]
		\centering
		\begin{tabular}{lcccccccccccccccccccc}
			\hline
			& \multicolumn{16}{c}{\textbf{Projects}} \\
			\cline{2-17}
			& \multicolumn{4}{c}{\textbf{JPetStore}} & \multicolumn{4}{c}{\textbf{DayTrader}} & \multicolumn{4}{c}{\textbf{AcmeAir}} & \multicolumn{4}{c}{\textbf{Plants}} \\
			\cline{2-17}
			\textbf{Model} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} & \textbf{SM} & \textbf{ICP} & \textbf{IFN} & \textbf{NED} \\
			\hline
UniXcoder with GNN-based Soft Clustering & 0.138 & \textbf{0.102} & \textbf{0.224} & 0.556 & 0.328 & 0.101 & \textbf{0.295} & 0.650 & 0.154 & \textbf{0.077} & \textbf{0.125} & 0.404 & \textbf{0.589} & \textbf{0.076} & 0.760 & 0.344\\
UniXcoder with Distance-Based Soft Clustering & 0.124 & 0.380 & 0.783 & 0.321 & 0.159 & 0.444 & 0.857 & \textbf{0.321} & \textbf{0.178} & 0.236 & 0.838 & 0.325 & 0.562 & 0.

In [6]:
explanation = {
	"SM": " (higher is better)",
	"ICP": " (lower is better)",
	"IFN": " (lower is better)",
	"NED": " (lower is better)"
}

In [7]:
table_head = r'''\begin{table}[ht]
	\centering
	\begin{tabular}{lcccc}
		\hline
		& \multicolumn{4}{c}{\textbf{Projects}} \\
		\cline{2-5}
		\textbf{Model} & \textbf{JPetStore} & \textbf{DayTrader} & \textbf{AcmeAir} & \textbf{Plants} \\
		\hline'''
table_tail = r'''		\hline
	\end{tabular}
	\caption{Model performance on different projects (mean, median ± variance).}
	\label{tab:model-performance-metric}
\end{table}'''

In [8]:
for metric in metrics:
	print(f"\\center{{\\textbf{{{metric}, {explanation[metric]}}}}}")
	print()
	print(table_head)
	method_results = []
	for method in reversed(methods):
		line = method_names[method]
		for project in projects:
			if (project, metric, method) in bests:
				line += f" & \\textbf{{{results[project][method][metric]:.3f}}}"
			else:
				line += f" & {results[project][method][metric]:.3f}"
		line += r"\\"
		print(line)
	print(table_tail.replace("metric", metric))
	print()

\center{\textbf{SM,  (higher is better)}}

\begin{table}[ht]
	\centering
	\begin{tabular}{lcccc}
		\hline
		& \multicolumn{4}{c}{\textbf{Projects}} \\
		\cline{2-5}
		\textbf{Model} & \textbf{JPetStore} & \textbf{DayTrader} & \textbf{AcmeAir} & \textbf{Plants} \\
		\hline
TF-IDF with GNN-based Soft Clustering & \textbf{0.159} & \textbf{0.353} & 0.152 & 0.451\\
UniXcoder with GNN-based Hard Clustering & 0.063 & 0.087 & 0.125 & -0.022\\
UniXcoder with Distance-Based Soft Clustering & 0.124 & 0.159 & \textbf{0.178} & 0.562\\
UniXcoder with GNN-based Soft Clustering & 0.148 & 0.339 & 0.160 & \textbf{0.579}\\
		\hline
	\end{tabular}
	\caption{Model performance on different projects (mean, median ± variance).}
	\label{tab:model-performance-SM}
\end{table}

\center{\textbf{ICP,  (lower is better)}}

\begin{table}[ht]
	\centering
	\begin{tabular}{lcccc}
		\hline
		& \multicolumn{4}{c}{\textbf{Projects}} \\
		\cline{2-5}
		\textbf{Model} & \textbf{JPetStore} & \textbf{DayTrader} & \textbf{AcmeA