<a href="https://colab.research.google.com/github/MaxiPerrone/fraud-detection-ml/blob/main/4_Fraud_Detection_SVM_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV

import pandas as pd
import kagglehub
import os

In [None]:
dataset_path = kagglehub.dataset_download("dhanushnarayananr/credit-card-fraud")
csv_file = os.path.join(dataset_path, "card_transdata.csv")

Downloading from https://www.kaggle.com/api/v1/datasets/download/dhanushnarayananr/credit-card-fraud?dataset_version_number=1...


100%|██████████| 28.9M/28.9M [00:00<00:00, 95.9MB/s]

Extracting files...





In [None]:
df = pd.read_csv(csv_file).sample(1000, random_state=42)
df.head()

Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
987231,0.929509,1.296477,0.36111,0.0,0.0,0.0,1.0,0.0
79954,0.611179,0.208295,3.118884,0.0,0.0,0.0,1.0,0.0
567130,3.956062,0.529194,1.579942,1.0,0.0,0.0,0.0,0.0
500891,21.798902,0.019399,11.416909,1.0,0.0,0.0,0.0,0.0
55399,3.310635,1.707802,2.028915,1.0,0.0,0.0,0.0,0.0


In [None]:
X = df.drop('fraud', axis=1)
y = df['fraud'].copy()

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

svm = SVC(kernel='linear', probability=True, random_state=42)
calibrated_svm = CalibratedClassifierCV(svm)
calibrated_svm.fit(X_scaled, y)

In [None]:
distance_from_home = float(input("Enter Distance From Home: "))
distance_from_last_transaction = float(input("Enter Distance From Last Transaction: "))
ratio_to_median_purchase_price = float(input("Enter Ratio to Median Purchase Price: "))
repeat_retailer = int(input("Enter Repeat Retailer (0 or 1): "))
used_chip = int(input("Enter Used Chip (0 or 1): "))
used_pin_number = int(input("Enter Used Pin Number (0 or 1): "))
online_order = int(input("Enter Online Order (0 or 1): "))

Enter Distance From Home: 1
Enter Distance From Last Transaction: 1
Enter Ratio to Median Purchase Price: 1
Enter Repeat Retailer (0 or 1): 1
Enter Used Chip (0 or 1): 0
Enter Used Pin Number (0 or 1): 0
Enter Online Order (0 or 1): 0


In [None]:
new_transaction_features = pd.DataFrame({
    'distance_from_home': [distance_from_home],
    'distance_from_last_transaction': [distance_from_last_transaction],
    'ratio_to_median_purchase_price': [ratio_to_median_purchase_price],
    'repeat_retailer': [repeat_retailer],
    'used_chip': [used_chip],
    'used_pin_number': [used_pin_number],
    'online_order': [online_order]
})

In [None]:
scaled_transaction = scaler.transform(new_transaction_features)
prediction = calibrated_svm.predict(scaled_transaction)
probability_of_fraud = calibrated_svm.predict_proba(scaled_transaction)[:,1][0]

In [None]:
label = 'Fraud' if prediction[0] == 1 else 'Legitime'
print('Prediction for new Transaction:', label)
print(f'Probability of fraud: {probability_of_fraud * 100:.2f}%')

Prediction for new Transaction: Legitime
Probability of fraud: 0.37%
