In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install rdkit

In [3]:
import os
import sys
import torch
import time
import datetime
from tqdm import tqdm
sys.path.append("/content/drive/MyDrive/HeckLit")
from utils.rxn import *
from utils.molecule import *
from torch.utils.data import DataLoader
from models.ANN import *
import matplotlib.pyplot as plt

In [None]:
rs_list = [1, 2, 3, 4, 5]
for rs in rs_list:
    # 1. import data
    random_state = rs
    data_Heck = pd.read_excel("/content/drive/MyDrive/HeckLit/data/Heck/Heck_fp.xlsx").sample(
        random_state=random_state, frac=1).reset_index(drop=True)
    data_BH = pd.read_excel("/content/drive/MyDrive/HeckLit/data/BH_HTE/BH_HTE_fp.xlsx").sample(
        random_state=random_state, frac=1).reset_index(drop=True)
    data_Suzuki = pd.read_excel("/content/drive/MyDrive/HeckLit/data/Suzuki_HTE/Suzuki_HTE_fp.xlsx").sample(
        random_state=random_state, frac=1).reset_index(drop=True)
    # 2. build dataset & dataloader
    intra_dataset = list()
    inter_dataset = list()
    BH_dataset = list()
    Suzuki_dataset = list()

    rxn_list = df_to_rxn_list(data_Heck)
    len_drfp = 0

    for batch in tqdm(range(data_Heck.shape[0])):
        rxn = rxn_list[batch]

        # features
        drfp = torch.tensor(read_drfp(data_Heck.loc[batch]["drfp"]), dtype=torch.float32)
        len_drfp = drfp.shape[0]
        # label
        y = rxn.rxn_yield / 100

        # Inter
        if len(rxn.reactants) == 2:
            inter_dataset.append([drfp, y])

        # Intra
        if len(rxn.reactants) == 1:
            intra_dataset.append([drfp, y])

    # BH
    for batch in tqdm(range(data_BH.shape[0])):
        # features
        drfp = torch.tensor(read_drfp(data_BH.loc[batch]["drfp"]), dtype=torch.float32)
        len_drfp = drfp.shape[0]

        # label
        y = data_BH.loc[batch]["yield"] / 100
        BH_dataset.append([drfp, y])

    # Suzuki
    for batch in tqdm(range(data_Suzuki.shape[0])):
        # features
        drfp = torch.tensor(read_drfp(data_Suzuki.loc[batch]["drfp"]), dtype=torch.float32)
        len_drfp = drfp.shape[0]

        # label
        y = data_Suzuki.loc[batch]["Product_Yield_PCT_Area_UV"] / 100
        Suzuki_dataset.append([drfp, y])

    # report
    dir_path = "/content/drive/MyDrive/HeckLit/exp/Heck_split/Heck_drfp_add_%s" % datetime.datetime.now()
    os.mkdir("%s" % dir_path)
    f = open("%s/Model_Training_Report.txt" % dir_path, mode="w")

    # split of train & test set
    n_base = 16  # intra sample number
    n_sample = 1024  # total sample number
    batch_size = 465
    f.write("random_state=%s\n" % random_state)
    f.write("n_base=%s\n" % n_base)
    f.write("n_sample=%s\n" % n_sample)
    f.write("batch_size=%s\n" % batch_size)

    test_num = len(intra_dataset) - 1024
    testset = intra_dataset[-test_num:]
    test_loader = DataLoader(testset, batch_size=batch_size, shuffle=True, drop_last=True)

    # intra
    intra_trainset = intra_dataset[0: n_sample]
    # data_loader
    intra_train_loader = DataLoader(intra_trainset, batch_size=batch_size, shuffle=True, drop_last=True)

    intra_test_RMSE = list()
    intra_test_R2 = list()
    intra_train_R2 = list()
    intra_train_RMSE = list()

    # inter
    inter_trainset = intra_dataset[: n_base] + inter_dataset[: n_sample - n_base]
    # data_loader
    inter_train_loader = DataLoader(inter_trainset, batch_size=batch_size, shuffle=True, drop_last=True)

    inter_test_RMSE = list()
    inter_test_R2 = list()
    inter_train_R2 = list()
    inter_train_RMSE = list()

    # BH
    BH_trainset = intra_dataset[: n_base] + BH_dataset[: n_sample - n_base]
    # data_loader
    BH_train_loader = DataLoader(BH_trainset, batch_size=batch_size, shuffle=True, drop_last=True)

    BH_test_RMSE = list()
    BH_test_R2 = list()
    BH_train_R2 = list()
    BH_train_RMSE = list()

    # Suzuki
    Suzuki_trainset = intra_dataset[: n_base] + Suzuki_dataset[: n_sample - n_base]
    # data_loader
    Suzuki_train_loader = DataLoader(Suzuki_trainset, batch_size=batch_size, shuffle=True, drop_last=True)

    Suzuki_test_RMSE = list()
    Suzuki_test_R2 = list()
    Suzuki_train_R2 = list()
    Suzuki_train_RMSE = list()

    # 3. training of the model

    # use gpu
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # intra
    # params
    intra_t = 2500
    intra_lr = 1e-4

    # model
    intra_model = ANN(input_size=len_drfp).to(device)
    intra_opti = torch.optim.Adam(intra_model.parameters(), lr=intra_lr, weight_decay=1e-5)
    intra_criterion = nn.MSELoss()

    # writedown params
    f.write("\nIntra params:\n")
    f.write("intra_t=%s\n" % intra_t)
    f.write("intra_lr=%s\n" % intra_lr)

    # inter
    # params
    inter_t = 2500
    inter_lr = 1e-4

    # model
    inter_model = ANN(input_size=len_drfp).to(device)
    inter_opti = torch.optim.Adam(inter_model.parameters(), lr=inter_lr, weight_decay=1e-5)
    inter_criterion = nn.MSELoss()

    # writedown params
    f.write("\nInter params:\n")
    f.write("inter_t=%s\n" % inter_t)
    f.write("inter_lr=%s\n" % inter_lr)

    # BH
    # params
    BH_t = 2500
    BH_lr = 1e-3

    # model
    BH_model = ANN(input_size=len_drfp).to(device)
    BH_opti = torch.optim.Adam(BH_model.parameters(), lr=BH_lr, weight_decay=1e-5)
    BH_criterion = nn.MSELoss()

    # writedown params
    f.write("\nBH params:\n")
    f.write("BH_t=%s\n" % inter_t)
    f.write("BH_lr=%s\n" % inter_lr)

    # Suzuki
    # params
    Suzuki_t = 2500
    Suzuki_lr = 1e-3

    # model
    Suzuki_model = ANN(input_size=len_drfp).to(device)
    Suzuki_opti = torch.optim.Adam(Suzuki_model.parameters(), lr=Suzuki_lr, weight_decay=1e-5)
    Suzuki_criterion = nn.MSELoss()

    # writedown params
    f.write("\nSuzuki params:\n")
    f.write("Suzuki_t=%s\n" % inter_t)
    f.write("Suzuki_lr=%s\n" % inter_lr)

    # Training
    # Intra

    # Intra best performance
    intra_best = [0, 0, 0, 0, [], []]  # train_R2, train_RMSE, test_R2, test_RMSE, test_predict, test_true

    f.write("\nIntra Start training\n")

    for epoch in tqdm(range(intra_t)):
        # Training
        global_loss = torch.tensor([0.])

        for data in intra_train_loader:
            x = data[:-1][0].cuda()
            y = torch.unsqueeze(data[-1], dim=1).cuda()
            loss = intra_criterion(intra_model.forward(x).float(), y.float())
            intra_opti.zero_grad()
            loss.backward()
            intra_opti.step()
            global_loss += loss.item()

        # record of loss during training
        # performance in train set
        with torch.no_grad():
            pred = list()
            true = list()
            for data in intra_train_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(intra_model.forward(x).cpu().detach().numpy())
                pred += pr
                true += tr
            intra_train_RMSE.append(RMSE(np.array(pred), np.array(true)))
            intra_train_R2.append(R2(np.array(pred), np.array(true)))

        # performance in test set
        with torch.no_grad():
            intra_pred = list()
            intra_true = list()
            for data in test_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(intra_model.forward(x).cpu().detach().numpy())
                intra_pred += pr
                intra_true += tr
            intra_test_RMSE.append(RMSE(np.array(intra_pred), np.array(intra_true)))
            intra_test_R2.append(R2(np.array(intra_pred), np.array(intra_true)))

            if epoch == 0 or intra_test_R2[-1] > intra_best[2]:
                intra_best = [intra_train_R2[-1], intra_train_RMSE[-1], intra_test_R2[-1], intra_test_RMSE[-1],
                              intra_pred, intra_true]

    # Evaluation
    f.write("\n")
    f.write("Intra dataset Performance\n")
    # Performance in train set
    f.write("R2 of train set is:%.3f+-%f\tbest:%f\n" % (
    np.array(intra_train_R2[-10:]).mean(), np.array(intra_train_R2[-10:]).std(), intra_best[0]))
    f.write("RMSE of train set is: %.3f+-%f\tbest:%f\n" % (
    np.array(intra_train_RMSE[-10:]).mean(), np.array(intra_train_RMSE[-10:]).std(), intra_best[1]))

    # Performance in test set
    f.write("R2 of test set is:%.3f+-%.3f\tbest:%f\n" % (
    np.array(intra_test_R2[-10:]).mean(), np.array(intra_test_R2[-10:]).std(), intra_best[2]))
    f.write("RMSE of test set is: %.3f+-%f\tbest:%f\n" % (
    np.array(intra_test_RMSE[-10:]).mean(), np.array(intra_test_RMSE[-10:]).std(), intra_best[3]))

    # Figure
    fig = plt.figure(dpi=120, figsize=(20, 7))

    # Training Fig
    plt.subplot(1, 2, 1)
    steps = np.linspace(1, intra_t, intra_t)
    plt.plot(steps, intra_train_RMSE, color=[236 / 255, 164 / 255, 124 / 255])
    plt.plot(steps, intra_test_RMSE, color=[117 / 255, 157 / 255, 219 / 255])
    # Beautify
    plt.legend(["train set RMSE", "test set RMSE"], loc="upper left", prop={'size': 10})
    plt.xlabel("Epoch", fontsize=10)
    plt.ylabel("RMSE", fontsize=10)
    plt.title("The RMSE value during training", fontsize=13)

    # Test set performance
    plt.subplot(1, 2, 2)
    intra_tr = np.array(intra_best[5]).flatten() * 100
    intra_pr = np.array(intra_best[4]).flatten() * 100
    plt.scatter(intra_pr, intra_tr, alpha=0.7, marker=".")
    plt.xlabel("Predicted Yield", fontsize=10)
    plt.ylabel("Observed Yield", fontsize=10)
    x = np.linspace(0, 100, 100)
    y = np.linspace(0, 100, 100)
    plt.plot(x, y, linestyle="--", color="r")
    plt.title("Test set performance", fontsize=15)

    fig.suptitle("Intramolecular dataset addition(n_Sample=%d)" % n_sample, fontsize=16)
    plt.tight_layout()
    plt.savefig("%s/Intramolecular dataset addition Performance Figure.png" % dir_path)
    plt.show()

    # Training
    # Inter

    # Inter best performance
    inter_best = [0, 0, 0, 0, [], []]  # train_R2, train_RMSE, test_R2, test_RMSE, test_predict, test_true

    f.write("\nInter Start training\n")

    for epoch in tqdm(range(inter_t)):
        # Training
        global_loss = torch.tensor([0.])

        for data in inter_train_loader:
            x = data[:-1][0].cuda()
            y = torch.unsqueeze(data[-1], dim=1).cuda()
            loss = inter_criterion(inter_model.forward(x).float(), y.float())
            inter_opti.zero_grad()
            loss.backward()
            inter_opti.step()
            global_loss += loss.item()

        # record of loss during training
        # performance in train set
        with torch.no_grad():
            pred = list()
            true = list()
            for data in inter_train_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(inter_model.forward(x).cpu().detach().numpy())
                pred += pr
                true += tr
            inter_train_RMSE.append(RMSE(np.array(pred), np.array(true)))
            inter_train_R2.append(R2(np.array(pred), np.array(true)))

        # performance in test set
        with torch.no_grad():
            inter_pred = list()
            inter_true = list()
            for data in test_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(inter_model.forward(x).cpu().detach().numpy())
                inter_pred += pr
                inter_true += tr
            inter_test_RMSE.append(RMSE(np.array(inter_pred), np.array(inter_true)))
            inter_test_R2.append(R2(np.array(inter_pred), np.array(inter_true)))

            if epoch == 0 or inter_test_R2[-1] > inter_best[2]:
                inter_best = [inter_train_R2[-1], inter_train_RMSE[-1], inter_test_R2[-1], inter_test_RMSE[-1],
                              inter_pred, inter_true]
    # Evaluation
    f.write("\n")
    f.write("Inter dataset Performance\n")
    # Performance in train set
    f.write("R2 of train set is:%.3f+-%f\tbest:%f\n" % (
    np.array(inter_train_R2[-10:]).mean(), np.array(inter_train_R2[-10:]).std(), inter_best[0]))
    f.write("RMSE of train set is: %.3f+-%f\tbest:%f\n" % (
    np.array(inter_train_RMSE[-10:]).mean(), np.array(inter_train_RMSE[-10:]).std(), inter_best[1]))

    # Performance in test set
    f.write("R2 of test set is:%.3f+-%.3f\tbest:%f\n" % (
    np.array(inter_test_R2[-10:]).mean(), np.array(inter_test_R2[-10:]).std(), inter_best[2]))
    f.write("RMSE of test set is: %.3f+-%f\tbest:%f\n" % (
    np.array(inter_test_RMSE[-10:]).mean(), np.array(inter_test_RMSE[-10:]).std(), inter_best[3]))

    # Figure
    fig = plt.figure(dpi=120, figsize=(20, 7))

    # Training Fig
    plt.subplot(1, 2, 1)
    steps = np.linspace(1, inter_t, inter_t)
    plt.plot(steps, inter_train_RMSE, color=[236 / 255, 164 / 255, 124 / 255])
    plt.plot(steps, inter_test_RMSE, color=[117 / 255, 157 / 255, 219 / 255])
    # Beautify
    plt.legend(["train set RMSE", "test set RMSE"], loc="upper left", prop={'size': 10})
    plt.xlabel("Epoch", fontsize=10)
    plt.ylabel("RMSE value", fontsize=10)
    plt.title("The RMSE value during training", fontsize=13)

    # Test set performance
    plt.subplot(1, 2, 2)
    inter_tr = np.array(inter_best[5]).flatten() * 100
    inter_pr = np.array(inter_best[4]).flatten() * 100
    plt.scatter(inter_pr, inter_tr, alpha=0.7, marker=".")
    plt.xlabel("Predicted Yield", fontsize=10)
    plt.ylabel("Observed Yield", fontsize=10)
    x = np.linspace(0, 100, 100)
    y = np.linspace(0, 100, 100)
    plt.plot(x, y, linestyle="--", color="r")
    plt.title("Test set performance", fontsize=15)

    fig.suptitle("Intermolecular dataset addition(n_Sample=%d)" % n_sample, fontsize=16)
    plt.tight_layout()
    plt.savefig("%s/Intermolecular dataset addition Performance Figure.png" % dir_path)
    plt.show()

    # Training
    # BH

    # BH best performance
    BH_best = [0, 0, 0, 0, [], []]  # train_R2, train_RMSE, test_R2, test_RMSE, test_predict, test_true

    f.write("\nBH Start training\n")

    for epoch in tqdm(range(BH_t)):
        # Training
        global_loss = torch.tensor([0.])

        for data in BH_train_loader:
            x = data[:-1][0].cuda()
            y = torch.unsqueeze(data[-1], dim=1).cuda()
            loss = BH_criterion(BH_model.forward(x).float(), y.float())
            BH_opti.zero_grad()
            loss.backward()
            BH_opti.step()
            global_loss += loss.item()

        # record of loss during training
        # performance in train set
        with torch.no_grad():
            pred = list()
            true = list()
            for data in BH_train_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(BH_model.forward(x).cpu().detach().numpy())
                pred += pr
                true += tr
            BH_train_RMSE.append(RMSE(np.array(pred), np.array(true)))
            BH_train_R2.append(R2(np.array(pred), np.array(true)))

        # performance in test set
        with torch.no_grad():
            BH_pred = list()
            BH_true = list()
            for data in test_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(BH_model.forward(x).cpu().detach().numpy())
                BH_pred += pr
                BH_true += tr
            BH_test_RMSE.append(RMSE(np.array(BH_pred), np.array(BH_true)))
            BH_test_R2.append(R2(np.array(BH_pred), np.array(BH_true)))

            if epoch == 0 or BH_test_R2[-1] > BH_best[2]:
                BH_best = [BH_train_R2[-1], BH_train_RMSE[-1], BH_test_R2[-1], BH_test_RMSE[-1], BH_pred, BH_true]
    # Evaluation
    f.write("\n")
    f.write("BH dataset Performance\n")
    # Performance in train set
    f.write("R2 of train set is:%.3f+-%f\tbest:%f\n" % (
    np.array(BH_train_R2[-10:]).mean(), np.array(BH_train_R2[-10:]).std(), BH_best[0]))
    f.write("RMSE of train set is: %.3f+-%f\tbest:%f\n" % (
    np.array(BH_train_RMSE[-10:]).mean(), np.array(BH_train_RMSE[-10:]).std(), BH_best[1]))

    # Performance in test set
    f.write("R2 of test set is:%.3f+-%.3f\tbest:%f\n" % (
    np.array(BH_test_R2[-10:]).mean(), np.array(BH_test_R2[-10:]).std(), BH_best[2]))
    f.write("RMSE of test set is: %.3f+-%f\tbest:%f\n" % (
    np.array(BH_test_RMSE[-10:]).mean(), np.array(BH_test_RMSE[-10:]).std(), BH_best[3]))

    # Figure
    fig = plt.figure(dpi=120, figsize=(20, 7))

    # Training Fig
    plt.subplot(1, 2, 1)
    steps = np.linspace(1, BH_t, BH_t)
    plt.plot(steps, BH_train_RMSE, color=[236 / 255, 164 / 255, 124 / 255])
    plt.plot(steps, BH_test_RMSE, color=[117 / 255, 157 / 255, 219 / 255])
    # Beautify
    plt.legend(["train set RMSE", "test set RMSE"], loc="upper left", prop={'size': 10})
    plt.xlabel("Epoch", fontsize=10)
    plt.ylabel("RMSE value", fontsize=10)
    plt.title("The RMSE value uring training", fontsize=13)

    # Test set performance
    plt.subplot(1, 2, 2)
    BH_tr = np.array(BH_best[5]).flatten() * 100
    BH_pr = np.array(BH_best[4]).flatten() * 100
    plt.scatter(BH_pr, BH_tr, alpha=0.7, marker=".")
    plt.xlabel("Predicted Yield", fontsize=10)
    plt.ylabel("Observed Yield", fontsize=10)
    x = np.linspace(0, 100, 100)
    y = np.linspace(0, 100, 100)
    plt.plot(x, y, linestyle="--", color="r")
    plt.title("Test set performance", fontsize=15)

    fig.suptitle("Buchwald Hartwig dataset addition(n_Sample=%d)" % n_sample, fontsize=16)
    plt.tight_layout()
    plt.savefig("%s/Buchwald Hartwig dataset addition Performance Figure.png" % dir_path)
    plt.show()

    # Training
    # Suzuki

    # Suzuki best performance
    Suzuki_best = [0, 0, 0, 0, [], []]  # train_R2, train_RMSE, test_R2, test_RMSE, test_predict, test_true

    f.write("\nSuzuki Start training\n")

    for epoch in tqdm(range(Suzuki_t)):
        # Training
        global_loss = torch.tensor([0.])

        for data in Suzuki_train_loader:
            x = data[:-1][0].cuda()
            y = torch.unsqueeze(data[-1], dim=1).cuda()
            loss = Suzuki_criterion(Suzuki_model.forward(x).float(), y.float())
            Suzuki_opti.zero_grad()
            loss.backward()
            Suzuki_opti.step()
            global_loss += loss.item()

        # record of loss during training
        # performance in train set
        with torch.no_grad():
            pred = list()
            true = list()
            for data in Suzuki_train_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(Suzuki_model.forward(x).cpu().detach().numpy())
                pred += pr
                true += tr
            Suzuki_train_RMSE.append(RMSE(np.array(pred), np.array(true)))
            Suzuki_train_R2.append(R2(np.array(pred), np.array(true)))

        # performance in test set
        with torch.no_grad():
            Suzuki_pred = list()
            Suzuki_true = list()
            for data in test_loader:
                x = data[:-1][0].cuda()
                tr = list(torch.unsqueeze(data[-1], dim=1).detach().numpy())
                pr = list(Suzuki_model.forward(x).cpu().detach().numpy())
                Suzuki_pred += pr
                Suzuki_true += tr
            Suzuki_test_RMSE.append(RMSE(np.array(Suzuki_pred), np.array(Suzuki_true)))
            Suzuki_test_R2.append(R2(np.array(Suzuki_pred), np.array(Suzuki_true)))

            if epoch == 0 or Suzuki_test_R2[-1] > Suzuki_best[2]:
                Suzuki_best = [Suzuki_train_R2[-1], Suzuki_train_RMSE[-1], Suzuki_test_R2[-1], Suzuki_test_RMSE[-1],
                               Suzuki_pred, Suzuki_true]
    # Evaluation
    f.write("\n")
    f.write("Suzuki dataset Performance\n")
    # Performance in train set
    f.write("R2 of train set is:%.3f+-%f\tbest:%f\n" % (
    np.array(Suzuki_train_R2[-10:]).mean(), np.array(Suzuki_train_R2[-10:]).std(), Suzuki_best[0]))
    f.write("RMSE of train set is: %.3f+-%f\tbest:%f\n" % (
    np.array(Suzuki_train_RMSE[-10:]).mean(), np.array(Suzuki_train_RMSE[-10:]).std(), Suzuki_best[1]))

    # Performance in test set
    f.write("R2 of test set is:%.3f+-%.3f\tbest:%f\n" % (
    np.array(Suzuki_test_R2[-10:]).mean(), np.array(Suzuki_test_R2[-10:]).std(), Suzuki_best[2]))
    f.write("RMSE of test set is: %.3f+-%f\tbest:%f\n" % (
    np.array(Suzuki_test_RMSE[-10:]).mean(), np.array(Suzuki_test_RMSE[-10:]).std(), Suzuki_best[3]))

    # Figure
    fig = plt.figure(dpi=120, figsize=(20, 7))

    # Training Fig
    plt.subplot(1, 2, 1)
    steps = np.linspace(1, Suzuki_t, Suzuki_t)
    plt.plot(steps, Suzuki_train_RMSE, color=[236 / 255, 164 / 255, 124 / 255])
    plt.plot(steps, Suzuki_test_RMSE, color=[117 / 255, 157 / 255, 219 / 255])
    # Beautify
    plt.legend(["train set RMSE", "test set RMSE"], loc="upper left", prop={'size': 10})
    plt.xlabel("Epoch", fontsize=10)
    plt.ylabel("RMSE value", fontsize=10)
    plt.title("The RMSE value during training", fontsize=13)

    # Test set performance
    plt.subplot(1, 2, 2)
    Suzuki_tr = np.array(Suzuki_best[5]).flatten() * 100
    Suzuki_pr = np.array(Suzuki_best[4]).flatten() * 100
    plt.scatter(Suzuki_pr, Suzuki_tr, alpha=0.7, marker=".")
    plt.xlabel("Predicted Yield", fontsize=10)
    plt.ylabel("Observed Yield", fontsize=10)
    x = np.linspace(0, 100, 100)
    y = np.linspace(0, 100, 100)
    plt.plot(x, y, linestyle="--", color="r")
    plt.title("Test set performance", fontsize=15)

    fig.suptitle("Suzuki dataset addition(n_Sample=%d)" % n_sample, fontsize=16)
    plt.tight_layout()
    plt.savefig("%s/Suzuki dataset addition Performance Figure.png" % dir_path)
    plt.show()

    f.close()