In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, confusion_matrix



In [None]:
credits = pd.read_csv('creditcard.csv')

In [None]:
features = credits.drop(columns=['Class'])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)
model = IsolationForest(n_estimators=100, contamination=0.0017, random_state=42)
preds = model.fit_predict(X_scaled)
credits['anomaly_iforest'] = [1 if i == -1 else 0 for i in preds]

In [None]:
svm_model = OneClassSVM(kernel='rbf', nu=0.0017, gamma='scale')
svm_preds = svm_model.fit_predict(X_scaled)
credits['anomaly_ocsvm'] = [1 if i == -1 else 0 for i in svm_preds]

ValueError: Input X contains NaN.
OneClassSVM does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:

y_true = credits['Class']
y_pred_iforest = credits['anomaly_iforest']

cm_iforest = confusion_matrix(y_true, y_pred_iforest)
print("Confusion Matrix:\n", cm_iforest)

report_iforest = classification_report(y_true, y_pred_iforest, target_names=["Normal", "Fraud"])
print("Classification Report:\n", report_iforest)

y_pred_ocsvm = credits['anomaly_ocsvm']

cm_ocsvm = confusion_matrix(y_true, y_pred_ocsvm)
print("Confusion Matrix:\n", cm_ocsvm)

report_ocsvm = classification_report(y_true, y_pred_ocsvm, target_names=["Normal", "Fraud"])
print("Classification Report:\n", report_ocsvm)


In [None]:
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score

Card_WOC = df.drop('Class', axis=1)
Class_Card = df['Class']

contamination = Class_Card.sum() / len(Class_Card)

iso_forest = IsolationForest(contamination=contamination, random_state=42)
iso_forest.fit(Card_Data[Bef_Scaling])
Class_pred_iso = iso_forest.predict(Card_Data[Bef_Scaling])

Class_pred_iso = [1 if x == -1 else 0 for x in Class_pred_iso]

print("=== Isolation Forest Performance ===")
print(classification_report(Class_Card, Class_pred_iso))
print("Confusion Matrix:\n", confusion_matrix(Class_Card, Class_pred_iso))

oc_svm = OneClassSVM(nu=contamination, kernel='rbf', gamma='scale')
oc_svm.fit(Card_Data[Bef_Scaling])
Class_pred_svm = oc_svm.predict(Card_Data[Bef_Scaling])
Class_pred_svm = [1 if x == -1 else 0 for x in Class_pred_svm]

print("=== One-Class SVM Performance ===")
print(classification_report(Class_Card, Class_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(Class, Class_pred_svm))

precision_iso = precision_score(Class, Class_pred_iso)
recall_iso = recall_score(Class, Class_pred_iso)
f1_iso = f1_score(Class, Class_pred_iso)

precision_svm = precision_score(Class, Class_pred_svm)
recall_svm = recall_score(Class, Class_pred_svm)
f1_svm = f1_score(Class, Class_pred_svm)

print(f"Isolation Forest - Precision: {precision_iso:.4f}, Recall: {recall_iso:.4f}, F1-Score: {f1_iso:.4f}")
print(f"One-Class SVM    - Precision: {precision_svm:.4f}, Recall: {recall_svm:.4f}, F1-Score: {f1_svm:.4f}")


In [None]:
eps =[0.5,0.6,0.7]
min_samples = [10,15,20]

dbscan = DBSCAN(eps=eps, min_samples=min_samples)
dbscan_labels = dbscan.fit_predict(Card_Data[Bef_Scaling])

noise_points = np.sum(dbscan_labels == -1)
print(f"Number of noise points: {noise_points}")

pca = PCA(n_components=2)
X_pca = pca.fit_transform(Card_Data[Bef_Scaling])

plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.title("DBSCAN Clusters")
plt.scatter(X_pca[:,0], X_pca[:,1], c=dbscan_labels, cmap='Paired', s=20)
plt.xlabel('PC1')
plt.ylabel('PC2')

k = len(set(dbscan_labels)) - (1 if -1 in dbscan_labels else 0)
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans_labels = kmeans.fit_predict(Card_Data[Bef_Scaling])

plt.subplot(1,2,2)
plt.title("K-Means Clusters")
plt.scatter(X_pca[:,0], X_pca[:,1], c=kmeans_labels, cmap='Paired', s=20)
plt.xlabel('PC1')
plt.ylabel('PC2')

plt.tight_layout()
plt.show()
