In [25]:
# загружаем библиотеки
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# убираем предупреждения
import warnings
warnings.filterwarnings('ignore')

# Загрузка данных
df = pd.read_csv('creditcard.csv')

# Просмотр данных
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [8]:
# Размерность данных
print(df.shape)

(284807, 31)


In [5]:
# Проверка на наличие пропусков
print("Проверка на пропуски:")
print(df.isnull().sum())

Проверка на пропуски:
Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64


In [6]:
# Проверка на наличие дубликатов
print(df.duplicated().sum())

1081


In [9]:
# Удаление дубликатов
df = df.drop_duplicates()
# Повторная проверка на наличие дубликатов
print(df.duplicated().sum())

0


In [12]:
corr_matrix = df.corr()
corr_with_target = corr_matrix['Class'].sort_values(ascending=False)
corr_with_target

Class     1.000000
V11       0.149067
V4        0.129326
V2        0.084624
V19       0.033631
V8        0.033068
V21       0.026357
V27       0.021892
V20       0.021486
V28       0.009682
Amount    0.005777
V22       0.004887
V26       0.004265
V25       0.003202
V15      -0.003300
V13      -0.003897
V23      -0.006333
V24      -0.007210
Time     -0.012359
V6       -0.043915
V5       -0.087812
V9       -0.094021
V1       -0.094486
V18      -0.105340
V7       -0.172347
V3       -0.182322
V16      -0.187186
V10      -0.206971
V12      -0.250711
V14      -0.293375
V17      -0.313498
Name: Class, dtype: float64

In [16]:
X = df[['V11','V4','V2','V19','V8','V21','V27','V20','V10','V12','V14',]]
y = df['Class']

In [17]:
# Разделение данных на тренировочную и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

In [22]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

In [26]:
svc = SVC(random_state=42)
svc.fit(X_train_scaled, y_train)
svc_pred = svc.predict(X_test_scaled)

In [27]:
rf_accuracy = accuracy_score(y_test, rf_pred)
dt_accuracy = accuracy_score(y_test, dt_pred)
svc_accuracy = accuracy_score(y_test, svc_pred)

In [28]:
print(f"Random Forest Accuracy: {rf_accuracy}")
print(f"Decision Tree Accuracy: {dt_accuracy}")
print(f"SVC Accuracy: {svc_accuracy}\n")

Random Forest Accuracy: 0.9994537059880872
Decision Tree Accuracy: 0.9991188806259472
SVC Accuracy: 0.9994184612131252



In [29]:
отчет_случайного_леса = classification_report(y_test, rf_pred)
отчет_дерева_решений = classification_report(y_test, dt_pred)
отчет_svc = classification_report(y_test, svc_pred)

In [30]:

# Печать отчета по классификации с использованием Случайного леса
print(f"Отчет по классификации Случайного леса:\n{отчет_случайного_леса}")

# Печать отчета по классификации с использованием Дерева решений
print(f"Отчет по классификации Дерева решений:\n{отчет_дерева_решений}")

# Печать отчета по классификации с использованием SVC (Метод опорных векторов)
print(f"Отчет по классификации SVC:\n{отчет_svc}")


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56656
           1       0.95      0.69      0.80        90

    accuracy                           1.00     56746
   macro avg       0.98      0.84      0.90     56746
weighted avg       1.00      1.00      1.00     56746

Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56656
           1       0.76      0.64      0.70        90

    accuracy                           1.00     56746
   macro avg       0.88      0.82      0.85     56746
weighted avg       1.00      1.00      1.00     56746

SVC Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56656
           1       0.98      0.64      0.78        90

    accuracy                           1.00     56746
   macro avg       0.99    