In [19]:
import matplotlib.pyplot as plt
import pandas as pd
import dataclasses
import json
import os

@dataclasses.dataclass
class Config:
    graph_name: str
    model: str
    lr: str
    
    def get_filename(self):
        return f"{self.graph_name}_{self.model}_{self.lr}.json"
    
    def get_json(self, mode, data_dir="./json"):
        path = os.path.join(data_dir, mode, self.get_filename())
        with open(path, "r") as f:
            return json.load(f)
    
    def get_df(self, mode, data_dir="./json"):
        json_data = self.get_json(mode, data_dir)
        data = [ item for item in json_data["results"] ]
        return pd.DataFrame(data)
    
    def get_pair(self, mode, data_dir="./json"):
        df = self.get_df(mode, data_dir)
        return df['acc_epoch_time'], df['eval_acc']

    
def plot_data(mb_df, full_df, max_mb_epoch=-1, max_full_epoch=-1, plotname=None):
    # Create a plot
    plt.figure(figsize=(3, 2))
    
    plt.plot(mb_df['acc_epoch_time'][:max_mb_epoch], mb_df['eval_acc'][:max_mb_epoch], marker='o', markersize=3, linestyle='-', color='b', label="MB")
    plt.plot(full_df['acc_epoch_time'][:max_full_epoch], full_df['eval_acc'][:max_full_epoch], marker='x', linestyle='-', color='r', label="Full")

    # Add labels and title
    plt.xlabel('Training Time (s)')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. Training Time')
    
    # Display the plot
    plt.grid(True)
    plt.legend(loc="lower right")
    ymin, ymax = plt.ylim()
    plt.ylim(0, min(1, ymax + 0.125))
    if plotname:
        os.makedirs("plot", exist_ok=True)
        plt.tight_layout()
        plt.savefig(f"./plot/{plotname}")
    plt.show()


In [1]:
graph_names = ["ogbn-arxiv", "reddit", "ogbn-products"]

for graph_name in graph_names:
    mb_conf = Config(graph_name=graph_name, model="sage", lr="0.001")
    mb_df = mb_conf.get_df("train_minibatch", "json")
    full_conf = Config(graph_name=graph_name, model="gat", lr="0.001")
    full_df = full_conf.get_df("train_full", "json")
    plot_data(mb_df, full_df, 10, 20, f"{graph_name}_mb_vs_full_cpu.pdf")

NameError: name 'Config' is not defined

In [2]:
for graph_name in graph_names:
    mb_conf = Config(graph_name=graph_name, model="sage", lr="0.001")
    mb_df = mb_conf.get_df("train_minibatch", "json")
    full_conf = Config(graph_name=graph_name, model="gat", lr="0.001")
    full_df = full_conf.get_df("train_full", "json")

NameError: name 'Config' is not defined