### This code preprocesses all of the csv files in ../Data_Output file path, and generate a processed csv file with binary label (success/fail) for training (56 * 9 size) 504 samples.

In [15]:
from sklearn.cluster import DBSCAN
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import re
import concurrent.futures
from tqdm import tqdm  # 可选进度条

In [6]:
def find_skyrmion_center_by_clustering(df,
                                       mz_col=None,
                                       x_col="% X", y_col="Y",
                                       threshold=0.5,
                                       eps=5.0, min_samples=5):
    """
    找出 mZ > 阈值 的点，做 DBSCAN 聚类，取最大簇的中心。
    适用于非规则网格。

    返回:
        x_center, y_center, mz_avg
    """
    if mz_col is None:
        mz_candidates = [col for col in df.columns if 'mZ' in col and '@' in col]
        if not mz_candidates:
            raise ValueError("未找到 mZ 列")
        mz_col = mz_candidates[0]

    # 选出高于阈值的点
    df_high = df[df[mz_col] > threshold].copy()
    if df_high.empty:
        return np.nan, np.nan, np.nan

    coords = df_high[[x_col, y_col]].values

    # DBSCAN 聚类
    clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(coords)
    df_high["cluster"] = clustering.labels_

    # 只保留最大簇（忽略 -1 噪声）
    valid_clusters = df_high[df_high["cluster"] != -1]["cluster"]
    if valid_clusters.empty:
        return np.nan, np.nan, np.nan

    main_cluster = valid_clusters.value_counts().idxmax()
    df_main = df_high[df_high["cluster"] == main_cluster]

    # 取几何中心或加权中心
    x_center = df_main[x_col].mean()
    y_center = df_main[y_col].mean()
    mz_avg = df_main[mz_col].mean()

    # print("mZ max:", df[mz_col].max())
    # print("mZ threshold used:", threshold)
    # print("Number of points > threshold:", len(df_high))

    return x_center, y_center, mz_avg

In [19]:

# 文件路径模板
csv_files = glob.glob("../Data_Output/Val/bloch_STT_*_sweep*.csv")

# 存储处理结果
all_results = []

for file_path in csv_files:
    # 提取 jx 数值（科学记数法）
    match = re.search(r'bloch_STT_(\d+e\d+)_sweep\d+\.csv', file_path)
    if not match:
        print(f"文件名格式不匹配: {file_path}")
        continue
    else:
        jx = float(match.group(1))
        df = pd.read_csv(file_path, skiprows=8)

        print(f"Processing file: {file_path} with jx = {jx}")
        num_cols = df.shape[1]

        X = df["% X"]
        Y = df["Y"]
        t = df["t"]


        for col in range(3, num_cols, 5):
        # print(f"Column {col}: {df.columns[col]}")
            mX = df.columns[col]
            mY = df.columns[col + 1]
            mZ = df.columns[col + 2]
            alpha = df.columns[col + 3]
            beta = df.columns[col + 4]

            alpha_value = (df[alpha].values[0])
            beta_value = (df[beta].values[0])

            # ====== 初始化轨迹存储 ======
            X_trajectory = {}
            Y_trajectory = {}

            # ====== 遍历所有时间帧计算中心位置 ======
            for t, df_frame in df.groupby("t"):
                x_center, y_center, _ = find_skyrmion_center_by_clustering(
                    df_frame,
                    mz_col=mZ,
                    threshold=0.3,
                    eps=8.0,
                    min_samples=5
                )

                X_trajectory[t] = x_center
                Y_trajectory[t] = y_center

            # ====== 准备数据用于绘图 ======
            times = np.array(sorted(X_trajectory.keys()))
            x_values = np.array([X_trajectory[t] for t in times])
            y_values = np.array([Y_trajectory[t] for t in times])

            y_amp = np.max(y_values) - np.min(y_values)
            result = "success" if y_amp < 20 else "fail"

            all_results.append({
                "jx": jx,
                "alpha": alpha_value,
                "beta": beta_value,
                "result": result
            })


results_df = pd.DataFrame(all_results)
output_path = "../Data_Preprocessing/Preprocessed_Dataset/test_dataset2.csv"
results_df.to_csv(output_path, index=False)
print(f"Train dataset saved to {output_path}")

Processing file: ../Data_Output/Val\bloch_STT_7e11_sweep4.csv with jx = 700000000000.0
Train dataset saved to ../Data_Preprocessing/Preprocessed_Dataset/test_dataset2.csv
