In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix

# 1. Load raw data
df = pd.read_csv('SData.csv')  # adjust path as needed

# 2. Convert any complex-like strings to real floats
def to_real(x):
    try:
        return float(x)
    except:
        try:
            cx = complex(x)
            return cx.real
        except:
            raise ValueError(f"Cannot convert {x} to float")

df = df.applymap(to_real)

# 3. Separate features and labels
y = df.iloc[:, 0].values           # first column is label
X = df.iloc[:, 1:].values          # remaining columns are features

# 4. Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
# 5. Define DMS-PSO-SVM (as before)(Note: Note that the following parameters defined here for DMS-PSO are only a reference and do not represent parameters that can be adapted to your task.)
def dms_pso_svm(X, y,
                n_particles=30,
                n_swarms=3,
                n_iter=50,
                regroup_period=10,
                C_bounds=(1e-3, 1e3),
                gamma_bounds=(1e-4, 1e1),
                inertia_weight=0.729,
                c1=1.49445,
                c2=1.49445,
                cv_folds=5,
                random_state=42):
    rng = np.random.RandomState(random_state)
    logC_min, logC_max = np.log10(C_bounds[0]), np.log10(C_bounds[1])
    logG_min, logG_max = np.log10(gamma_bounds[0]), np.log10(gamma_bounds[1])
    dims = 2
    positions = rng.uniform([logC_min, logG_min], [logC_max, logG_max], (n_particles, dims))
    velocities = 0.1 * (rng.rand(n_particles, dims) - 0.5)
    indices = np.arange(n_particles)
    rng.shuffle(indices)
    subswarm_idxs = np.array_split(indices, n_swarms)
    pbest_pos = positions.copy()
    pbest_fit = np.full(n_particles, np.inf)
    lbest_pos = np.zeros((n_swarms, dims))
    lbest_fit = np.full(n_swarms, np.inf)
    gbest_pos = None
    gbest_fit = np.inf
    cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=random_state)

    def fitness(log_params):
        C_val = 10**log_params[0]
        gamma_val = 10**log_params[1]
        clf = SVC(C=C_val, kernel='rbf', gamma=gamma_val,
                  probability=True, random_state=random_state)
        scores = cross_val_score(clf, X, y, cv=cv, scoring='roc_auc', n_jobs=-1)
        return 1 - scores.mean()

    for i in range(n_particles):
        fit = fitness(positions[i])
        pbest_fit[i] = fit
        if fit < gbest_fit:
            gbest_fit, gbest_pos = fit, positions[i].copy()

    for s, idxs in enumerate(subswarm_idxs):
        best_idx = idxs[np.argmin(pbest_fit[idxs])]
        lbest_fit[s] = pbest_fit[best_idx]
        lbest_pos[s] = pbest_pos[best_idx].copy()

    for t in range(1, n_iter+1):
        for s, idxs in enumerate(subswarm_idxs):
            for i in idxs:
                r1, r2 = rng.rand(dims), rng.rand(dims)
                velocities[i] = (inertia_weight * velocities[i]
                                 + c1 * r1 * (pbest_pos[i] - positions[i])
                                 + c2 * r2 * (lbest_pos[s] - positions[i]))
                positions[i] += velocities[i]
                positions[i, 0] = np.clip(positions[i, 0], logC_min, logC_max)
                positions[i, 1] = np.clip(positions[i, 1], logG_min, logG_max)
                fit = fitness(positions[i])
                if fit < pbest_fit[i]:
                    pbest_fit[i], pbest_pos[i] = fit, positions[i].copy()
                if fit < lbest_fit[s]:
                    lbest_fit[s], lbest_pos[s] = fit, positions[i].copy()
                if fit < gbest_fit:
                    gbest_fit, gbest_pos = fit, positions[i].copy()
        if t % regroup_period == 0:
            rng.shuffle(indices)
            subswarm_idxs = np.array_split(indices, n_swarms)

    best_C = 10**gbest_pos[0]
    best_gamma = 10**gbest_pos[1]
    final_svm = SVC(C=best_C, kernel='rbf', gamma=best_gamma,
                    probability=True, random_state=random_state)
    final_svm.fit(X, y)
    return {'C': best_C, 'gamma': best_gamma, 'model': final_svm}

# 6. Run optimization and train final model
result = dms_pso_svm(X_train, y_train, random_state=42)
print(f"Optimized C: {result['C']:.5f}, gamma: {result['gamma']:.5f}")

# 7. Evaluate on the validation set
svm_model = result['model']
y_prob = svm_model.predict_proba(X_val)[:, 1]
y_pred = (y_prob >= 0.47).astype(int)

auc = roc_auc_score(y_val, y_prob)
acc = accuracy_score(y_val, y_pred)
tn, fp, fn, tp = confusion_matrix(y_val, y_pred).ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print(f"Validation AUC: {auc:.3f}")
print(f"Validation Accuracy: {acc:.3f}")
print(f"Validation Sensitivity: {sensitivity:.3f}")
print(f"Validation Specificity: {specificity:.3f}")