In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# 路径信息
datasets = ['imdb', 'yelp', 'sst2']
base_path = '/content/drive/MyDrive/Colab Notebooks'

# 定义需要对比的列
metrics = [
    'accuracy', 'precision', 'recall', 'auc', 'val_accuracy', 'val_precision', 'val_recall', 'val_auc'
]

# 创建保存图表的文件夹
output_path = '/content/drive/MyDrive/Colab Notebooks/plots'
os.makedirs(output_path, exist_ok=True)

# 初始化一个字典来保存每个模型在不同数据集的最终表现
final_results = {dataset: {} for dataset in datasets}

# 读取数据并提取最终指标
for dataset in datasets:
    dataset_path = os.path.join(base_path, dataset)
    model_files = [f for f in os.listdir(dataset_path) if f.endswith('_training_log.csv')]

    for model_file in model_files:
        model_name = model_file.split('_')[0]  # 提取模型名称
        model_path = os.path.join(dataset_path, model_file)

        # 读取CSV文件，忽略列名大小写
        data = pd.read_csv(model_path, index_col=0)
        data.columns = [col.lower() for col in data.columns]

        # 提取最后一个 epoch 的数据作为最终表现
        final_results[dataset][model_name] = data.iloc[-1][metrics]

# 1. 绘制最终表现对比图
for metric in metrics:
    plt.figure(figsize=(12, 8))
    for dataset in datasets:
        # 收集每个模型在当前数据集的最终 metric 值
        model_names = list(final_results[dataset].keys())
        metric_values = [final_results[dataset][model][metric] for model in model_names]

        # 绘制柱状图
        plt.bar([f"{dataset}_{model}" for model in model_names], metric_values, label=dataset)

    plt.title(f'Comparison of {metric.capitalize()} for Different Models on Each Dataset')
    plt.xlabel('Model and Dataset')
    plt.ylabel(metric.capitalize())
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)

    # 保存图表为 JPEG 文件
    output_file = os.path.join(output_path, f'comparison_{metric}.jpeg')
    plt.savefig(output_file, format='jpeg')
    plt.close()

# 2. 绘制每个模型在不同数据集上的训练曲线对比图
for metric in metrics:
    plt.figure(figsize=(12, 8))

    for dataset in datasets:
        dataset_path = os.path.join(base_path, dataset)
        model_files = [f for f in os.listdir(dataset_path) if f.endswith('_training_log.csv')]

        for model_file in model_files:
            model_name = model_file.split('_')[0]
            model_path = os.path.join(dataset_path, model_file)

            # 读取CSV文件，忽略列名大小写
            data = pd.read_csv(model_path, index_col=0)
            data.columns = [col.lower() for col in data.columns]

            # 绘制前10个 epoch 的 metric 曲线
            if metric in data.columns:
                plt.plot(data.index[:10], data[metric].iloc[:10], label=f"{dataset}_{model_name}")

    plt.title(f'{metric.capitalize()} over Epochs (First 10 Epochs)')
    plt.xlabel('Epoch')
    plt.ylabel(metric.capitalize())
    plt.legend()
    plt.grid(True)

    # 保存图表为 JPEG 文件
    output_file = os.path.join(output_path, f'curve_comparison_{metric}.jpeg')
    plt.savefig(output_file, format='jpeg')
    plt.close()


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# 路径信息
datasets = ['imdb', 'yelp', 'sst2']
base_path = '/content/drive/MyDrive/Colab Notebooks'

# 定义要绘制的列
metrics = ['loss', 'val_loss', 'accuracy', 'val_accuracy', 'auc']

# 设置保存路径
output_path = '/content/drive/MyDrive/Colab Notebooks/plots'
os.makedirs(output_path, exist_ok=True)

# 遍历每个数据集，读取 transformer 的日志文件并绘制图表
for dataset in datasets:
    dataset_path = os.path.join(base_path, dataset)
    model_file = 'transformer_imdb_training_log.csv'.replace('imdb', dataset)  # 替换文件名中的数据集名称
    model_path = os.path.join(dataset_path, model_file)

    # 读取 CSV 文件，忽略列名大小写
    if os.path.exists(model_path):
        data = pd.read_csv(model_path, index_col=0)
        data.columns = [col.lower() for col in data.columns]

        # 对每个指标绘制曲线
        for metric in metrics:
            if metric in data.columns:
                plt.figure(figsize=(10, 6))
                plt.plot(data.index[:10], data[metric].iloc[:10], label=f'{dataset}_{metric}', marker='o')

                # 设置图表信息
                plt.title(f'Transformer {metric.capitalize()} over Epochs (First 10 Epochs) - {dataset.upper()}')
                plt.xlabel('Epoch')
                plt.ylabel(metric.capitalize())
                plt.legend()
                plt.grid(True)

                # 保存图表为 JPEG 文件
                output_file = os.path.join(output_path, f'transformer_{dataset}_{metric}_first10epochs.jpeg')
                plt.savefig(output_file, format='jpeg')
                plt.close()  # 关闭图表以释放内存


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# 路径信息
datasets = ['imdb', 'yelp', 'sst2']
base_path = '/content/drive/MyDrive/Colab Notebooks'

# 要绘制的指标
metrics = ['loss', 'val_loss', 'accuracy', 'val_accuracy', 'auc']

# 设置保存路径
output_path = '/content/drive/MyDrive/Colab Notebooks/plots'
os.makedirs(output_path, exist_ok=True)

# 不同数据集的颜色
colors = {
    'imdb': 'blue',
    'yelp': 'green',
    'sst2': 'red'
}

# 绘制每个指标在不同数据集的对比曲线
for metric in metrics:
    plt.figure(figsize=(10, 6))

    for dataset in datasets:
        dataset_path = os.path.join(base_path, dataset)
        model_file = f'transformer_{dataset}_training_log.csv'  # transformer模型文件名
        model_path = os.path.join(dataset_path, model_file)

        # 读取 CSV 文件，忽略列名大小写
        if os.path.exists(model_path):
            data = pd.read_csv(model_path, index_col=0)
            data.columns = [col.lower() for col in data.columns]

            # 检查当前 metric 是否在列中并只绘制前10个 epoch 的数据
            if metric in data.columns:
                plt.plot(data.index[:10], data[metric].iloc[:10], label=f'{dataset.upper()} {metric}', color=colors[dataset], marker='o')

    # 设置图表信息
    plt.title(f'Transformer {metric.capitalize()} over Epochs (First 10 Epochs) Across Datasets')
    plt.xlabel('Epoch')
    plt.ylabel(metric.capitalize())
    plt.legend()
    plt.grid(True)

    # 保存图表为 JPEG 文件
    output_file = os.path.join(output_path, f'transformer_{metric}_comparison_first10epochs.jpeg')
    plt.savefig(output_file, format='jpeg')
    plt.close()  # 关闭图表以释放内存
