In [49]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.optimize import minimize

class S3VM:
    def __init__(self, C=1.0, max_iter=10):
        self.C = C  # 惩罚系数
        self.max_iter = max_iter  # 最大迭代次数
        self.model = SVC(kernel='rbf', C=self.C)  # 使用线性核的SVM

    def fit(self, X_labeled, y_labeled, X_unlabeled):
        # 初始化：将无标签数据的标签设置为伪标签
        y_unlabeled = np.random.choice(np.unique(y_labeled), len(X_unlabeled))  # 随机初始化伪标签

        for iteration in range(self.max_iter):
            # 使用有标签和伪标签数据进行训练
            X_combined = np.vstack((X_labeled, X_unlabeled))
            y_combined = np.hstack((y_labeled, y_unlabeled))

            # 训练SVM
            self.model.fit(X_combined, y_combined)

            # 重新预测无标签数据的伪标签
            y_unlabeled_new = self.model.predict(X_unlabeled)

            # 检查伪标签是否收敛
            if np.array_equal(y_unlabeled, y_unlabeled_new):
                print(f"伪标签在第 {iteration+1} 轮迭代中收敛")
                break

            # 更新伪标签
            y_unlabeled = y_unlabeled_new

    def predict(self, X):
        return self.model.predict(X)

# 数据生成
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=300, noise=0.2, random_state=42)

# 将部分数据作为无标签数据
X_labeled, X_unlabeled, y_labeled, _ = train_test_split(X, y, test_size=0.5,random_state=42)




In [50]:
# 训练半监督SVM
s3vm = S3VM(C=1.0, max_iter=10)
s3vm.fit(X_labeled, y_labeled, X_unlabeled)



伪标签在第 4 轮迭代中收敛


In [51]:
# 在测试集上评估模型
X_test, _, y_test, _ = train_test_split(X, y, test_size=0.2)
y_pred = s3vm.predict(X_test)
print(f"准确率: {accuracy_score(y_test, y_pred):.4f}")

准确率: 0.9292
