In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.metrics import classification_report

# 1. Load data
df = pd.read_csv('creditcard.csv')  # downloaded from Kaggle

# Drop rows with NaN values
df.dropna(inplace=True)

# 2. Preprocess: separate features and labels
X = df.drop('Class', axis=1).values
y = df['Class'].values

# Split the data into initial training and test sets
X_train_initial, X_test, y_train_initial, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Use only a few labeled examples from the initial training set, keep rest unlabeled
X_train, X_unlabeled, y_train, y_unlabeled = train_test_split(X_train_initial, y_train_initial, test_size=0.95, stratify=y_train_initial, random_state=42)
y_unlabeled[:] = -1  # mark all unlabeled as -1

# Combine for semi-supervised fitting
X_combined = np.vstack([X_train, X_unlabeled])
y_combined = np.concatenate([y_train, y_unlabeled])

# 3. Initialize self-training with RandomForest base estimator
base_clf = RandomForestClassifier(n_estimators=100, random_state=42)
self_clf = SelfTrainingClassifier(estimator=base_clf, threshold=0.9, max_iter=10)

# 4. Train
self_clf.fit(X_combined, y_combined)

# 5. Evaluate on held-out labeled test set
y_pred = self_clf.predict(X_test)

print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

         0.0     0.9960    1.0000    0.9980      1985
         1.0     0.0000    0.0000    0.0000         8

    accuracy                         0.9960      1993
   macro avg     0.4980    0.5000    0.4990      1993
weighted avg     0.9920    0.9960    0.9940      1993



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
