In [2]:
# ==========================================
# EXPERIMENT stage 2 – Test models no data augmentation
# ==========================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from collections import Counter

# MODELS
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

In [3]:
# Load data from CSV file
df = pd.read_csv("creditcard_dataset.csv")
print("Rozkład klas oryginalny:", Counter(df['Class']))

Rozkład klas oryginalny: Counter({0: 284315, 1: 492})


In [4]:
# Data separation to features and labels
X = df.drop('Class', axis=1)
y = df['Class']

In [5]:
# Splitting data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Rozkład klas w zbiorze treningowym:", Counter(y_train))

Rozkład klas w zbiorze treningowym: Counter({0: 227451, 1: 394})


In [6]:
# Models definition
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(eval_metric='logloss', random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=5000, class_weight='balanced', random_state=42),
    "Linear SVC": LinearSVC(class_weight='balanced', max_iter=2000, random_state=42)
}

In [None]:
# Training and model validation after SMOTE
results = {}

for name, model in models.items():
    print(f"\nModel: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"=== Results for model {name} ===")
    print(confusion_matrix(y_test, y_pred))
    report = classification_report(y_test, y_pred, target_names=["Correct", "Frauds"])
    print(report)
    
    results[name] = report


Model: Random Forest


In [None]:
# Saving results to file
with open("results_no_augmentation.txt", "w", encoding="utf-8") as f:
    for name, report in results.items():
        f.write(f"\n=== {name} ===\n")
        f.write(report)
        f.write("\n" + "="*60 + "\n")

print("\n Experiment WITHOUT data augmentation finished - see results_no_augmentation.txt")