In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from pyod.models.pca import PCA

In [None]:
features = pd.read_csv("test.csv")
ids = features[["id", "player_id"]]
X = features.drop(columns=["id", "player_id"])

In [None]:
preprocess = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())
X_proc = preprocess.fit_transform(X)

In [None]:
contamination = 0.05
model = PCA(contamination=contamination, n_components=0.99, weighted=True)
model.fit(X_proc)

In [None]:
scores = model.decision_scores_
preds = (scores > model.threshold_).astype(int)

out = pd.DataFrame(
    {
        "id": ids["id"],
        "player_id": ids["player_id"],
        "score": scores,
        "predicted_anomaly": preds,
    }
)
out.to_csv("predictions_unlabeled.csv", index=False)

print("Saved predictions_unlabeled.csv with scores and anomaly flags.")
print(f"Contamination used: {contamination:.3f}")
print(f"Predicted positives: {preds.sum()} of {len(preds)}")