# Merge data

In [1]:
import pandas as pd
import numpy as np

def merge_csv_files(folder1, folder2, output_folder):
    import os

    files1 = {f: os.path.join(folder1, f) for f in os.listdir(folder1) if f.endswith('.csv')}
    files2 = {f: os.path.join(folder2, f) for f in os.listdir(folder2) if f.endswith('.csv')}
    for filename in set(files1.keys()).intersection(files2.keys()):
        df1 = pd.read_csv(files1[filename])
        df2 = pd.read_csv(files2[filename])
        if df1.shape[1] > 1 and df2.shape[1] >1:
            df2.insert(2, 'y_pred', df1.iloc[:, 1])
        else:
            print(f"Skipping {filename} due to unexpected column structure.")
            continue
        output_file = os.path.join(output_folder, filename)
        df2.to_csv(output_file, index=False)
        print(f"Merged {filename} and saved to {output_file}")


if __name__ == "__main__":
    folder1 = './cal_data'
    folder2 = './pred_data'
    output_folder = './merged_data'
    
    import os
    os.makedirs(output_folder, exist_ok=True)
    merge_csv_files(folder2,folder1,  output_folder)

Merged 573_0.001.csv and saved to ./merged_data\573_0.001.csv
Merged 673_0.001.csv and saved to ./merged_data\673_0.001.csv
Merged 623_0.010.csv and saved to ./merged_data\623_0.010.csv
Merged 673_0.010.csv and saved to ./merged_data\673_0.010.csv
Merged 673_1.000.csv and saved to ./merged_data\673_1.000.csv
Merged 623_0.001.csv and saved to ./merged_data\623_0.001.csv
Merged 573_1.000.csv and saved to ./merged_data\573_1.000.csv
Merged 523_0.100.csv and saved to ./merged_data\523_0.100.csv
Merged 523_0.010.csv and saved to ./merged_data\523_0.010.csv
Merged 673_0.100.csv and saved to ./merged_data\673_0.100.csv
Merged 623_1.000.csv and saved to ./merged_data\623_1.000.csv
Merged 573_0.010.csv and saved to ./merged_data\573_0.010.csv
Merged 523_0.001.csv and saved to ./merged_data\523_0.001.csv
Merged 573_0.100.csv and saved to ./merged_data\573_0.100.csv
Merged 523_1.000.csv and saved to ./merged_data\523_1.000.csv
Merged 623_0.100.csv and saved to ./merged_data\623_0.100.csv


## Curve figure

In [2]:
import os
import re
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
from scipy.interpolate import interp1d
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from matplotlib.backends.backend_pdf import PdfPages


data_pred_dir = "./merged_data"   
data_org_dir  = "./alloy_dataset/AQ80"     
out_dir = "./fig_out"
os.makedirs(out_dir, exist_ok=True)
pdf_path = os.path.join(out_dir, "group_plots.pdf")

pattern = re.compile(r"^(\d+)_([0-9.]+)\.csv$", re.IGNORECASE)


mpl.rcParams.update({
    "font.family": "Arial",
    "font.size": 12,
    "axes.linewidth": 1.2,
    "axes.labelsize": 12,
    "xtick.direction": "in",
    "ytick.direction": "in",
    "xtick.top": True,
    "ytick.right": True,
    "xtick.major.width": 1.0,
    "ytick.major.width": 1.0,
    "legend.frameon": False,
})

custom_colors = [
    '#C93735',
    '#F8AB61',
    '#78A9CD',
    '#333A8C',
    "#C92CA2"
]

colors = np.array(custom_colors[:])

def load_csv_group(data_dir, needed_cols):
    "" "read csv file and return { (T, sr): DataFrame }"""
    data = {}
    temps_set, srs_set = set(), set()
    for path in glob.glob(os.path.join(data_dir, "*.csv")):
        name = os.path.basename(path)
        m = pattern.match(name)
        if not m:
            continue
        T_str, sr_str = m.groups()
        T, sr = int(T_str), float(sr_str)

        try:
            df = pd.read_csv(path)
        except Exception as e:
            print(f"[Skip] read error: {name} -> {e}")
            continue

        if not set(needed_cols).issubset(df.columns):
            print(f"[Skip] missing columns: {name} requires columns {needed_cols}")
            continue

        df = df[needed_cols].dropna().reset_index(drop=True)
        if df.empty:
            print(f"[Skip] empty data: {name}")
            continue
        data[(T, sr)] = df
        temps_set.add(T)
        srs_set.add(sr)
    return data, sorted(temps_set), sorted(srs_set)

# ======= read data =======
data_pred, temps1, srs1 = load_csv_group(data_pred_dir, ["y_true", "y_pred", "y_cal"])

data_org,  temps2, srs2 = load_csv_group(data_org_dir,  ["strain", "stress"])

temps = sorted(set(temps1) & set(temps2))
srs   = sorted(set(srs1) & set(srs2))

if not data_pred or not data_org:
    raise RuntimeError("No valid CSV files parsed, please check paths and column names!")

print(f"âœ… Parsed {len(data_pred)} groups of prediction data, {len(data_org)} groups of experimental data")
print("Temperature list:", temps)
print("Strain rate list:", srs)



âœ… Parsed 16 groups of prediction data, 16 groups of experimental data
Temperature list: [523, 573, 623, 673]
Strain rate list: [0.001, 0.01, 0.1, 1.0]


In [5]:
custom_color_map = {
}

# plot_y = "y_pred"
plot_y = "y_cal"

def plot_by_strainrate(data_pred, data_org, temps, srs, out_dir, pdf=None, custom_color_map=None):
    for sr in srs:
        fig, ax = plt.subplots(figsize=(3.5, 2.8))
        for i, T in enumerate(temps):
            key = (T, sr)
            if key not in data_pred or key not in data_org:
                continue
            df_pred, df_org = data_pred[key], data_org[key]

            curve_color = None
            if custom_color_map and key in custom_color_map:
                curve_color = custom_color_map[key]
            else:
                curve_color = colors[i % len(colors)] 

            ax.plot(np.linspace(0.05, 0.8, len(df_pred)), df_pred["y_true"],
                    color=curve_color, linewidth=1.8, linestyle="-",
                    label=f"T={T}K exp")

            ax.scatter(np.linspace(0.05, 0.8, len(df_pred)), df_pred[plot_y],
                    color=curve_color, marker="o", s=20, label=f"T={T}K pred")

        ax.set_xlabel("True strain")
        ax.set_ylabel("True stress (MPa)")
        ax.set_xlim(0.0, 0.85)
        ax.set_ylim(0, None)
        ax.grid(True, linestyle="--", linewidth=0.6, alpha=0.6)
        ax.tick_params(axis='x', which='both', bottom=True, top=False)
        ax.tick_params(axis='y', which='both', left=True, right=False)
        ax.xaxis.set_major_locator(plt.MaxNLocator(5))
        ax.yaxis.set_major_locator(plt.MaxNLocator(5))

        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        

        fig.savefig(os.path.join(out_dir, f"strainrate_{sr}.png"),
                    dpi=300, bbox_inches="tight")
        if pdf is not None:
            pdf.savefig(fig)
        plt.close(fig)

with PdfPages(pdf_path) as pdf:
    
    plot_by_strainrate(data_pred, data_org, temps, srs, out_dir, pdf, custom_color_map)

print(f"âœ… Figures have been saved: {pdf_path}")

âœ… Figures have been saved: ./fig_out\group_plots.pdf


## MSE bar figure


In [4]:
def plot_rmse_by_strainrate_bar(data_pred, temps, srs, out_dir, pdf=None, custom_color_map=None):
    os.makedirs(out_dir, exist_ok=True)
    bar_w = 0.35  

    all_rmse_records = []

    for sr in srs:
        temp_list, rmse_pred_list, rmse_cal_list = [], [], []

        for T in temps:
            key = (T, sr)
            if key not in data_pred:
                continue
            df = data_pred[key]

            rmse_pred = mean_squared_error(df["y_true"], df["y_pred"])
            rmse_cal  = mean_squared_error(df["y_true"], df["y_cal"])

            temp_list.append(T)
            rmse_pred_list.append(rmse_pred)
            rmse_cal_list.append(rmse_cal)

            all_rmse_records.append({
                "Temperature (K)": T,
                "Strain rate (s^-1)": sr,
                "MSE_pred": rmse_pred,
                "MSE_cal": rmse_cal
            })

        if not temp_list:
            continue

        x = np.arange(len(temp_list))
        fig, ax = plt.subplots(figsize=(3.5, 2.8)) 

        for i, T in enumerate(temp_list):
            key = (T, sr)

            if custom_color_map and key in custom_color_map:
                c = custom_color_map[key]
            else:
                c = colors[i % len(colors)]

            ax.bar(x[i] - bar_w/2, rmse_pred_list[i], width=bar_w,
                color="white", edgecolor=c, hatch="//",
                linewidth=1.2, label="MSE (y_pred vs y_true)" if i == 0 else "")

            ax.bar(x[i] + bar_w/2,   rmse_cal_list[i], width=bar_w,
                color=c, edgecolor=c,
                linewidth=1.2, alpha=0.9, label="MSE (y_cal vs y_true)" if i == 0 else "")

        ax.set_xticks(x, [str(T) for T in temp_list], rotation=0)
        ax.set_xlabel("Temperature (K)")
        ax.set_ylabel("MSE (MPa)")
        ax.grid(True, axis="y", linestyle="--", linewidth=0.6, alpha=0.6)
        ax.xaxis.set_major_locator(plt.MaxNLocator(5))
        ax.yaxis.set_major_locator(plt.MaxNLocator(5))
        ax.tick_params(axis='x', which='both', bottom=True, top=False)
        ax.tick_params(axis='y', which='both', left=True, right=False)

        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        
        out_path = os.path.join(out_dir, f"rmse_bar_strainrate_{sr}.png")
        fig.savefig(out_path, dpi=300, bbox_inches="tight")
        if pdf is not None:
            pdf.savefig(fig)
        plt.close(fig)

        print(f"âœ… Figures have been saved: {out_path}")

    if all_rmse_records:
        df_rmse = pd.DataFrame(all_rmse_records)
        csv_path = os.path.join(out_dir, "mse_results.csv")
        df_rmse.to_csv(csv_path, index=False)
        print(f"ðŸ“„ MSE data has been saved to: {csv_path}")

with PdfPages(pdf_path) as pdf:
    plot_rmse_by_strainrate_bar(data_pred, temps, srs, out_dir, pdf, custom_color_map)


âœ… Figures have been saved: ./fig_out\rmse_bar_strainrate_0.001.png
âœ… Figures have been saved: ./fig_out\rmse_bar_strainrate_0.01.png
âœ… Figures have been saved: ./fig_out\rmse_bar_strainrate_0.1.png
âœ… Figures have been saved: ./fig_out\rmse_bar_strainrate_1.0.png
ðŸ“„ MSE data has been saved to: ./fig_out\mse_results.csv
