In [None]:
# data = {
#   "Transaction ID": [
#     "TXN-00000000", "TXN-00000001", "TXN-00000002", "TXN-00000003",
#     "TXN-00000004", "TXN-00000005", "TXN-00000006", "TXN-00000007",
#     "TXN-00000008", "TXN-00000009", "TXN-00000010", "TXN-00000011",
#     "TXN-00000012", "TXN-00000013", "TXN-00000014", "TXN-00000015",
#     "TXN-00000016", "TXN-00000017", "TXN-00000018", "TXN-00000019"
#   ],
#   "Timestamp": [
#     "2023-11-19 23:53:27", "2023-11-19 01:35:13", "2023-11-19 03:22:34",
#     "2023-11-19 11:36:50", "2023-11-19 22:07:07", "2023-11-19 06:50:44",
#     "2023-11-19 07:34:52", "2023-11-19 20:56:59", "2023-11-19 04:58:12",
#     "2023-11-19 13:59:00", "2023-11-19 13:35:42", "2023-11-19 13:05:26",
#     "placeholder", "placeholder", "placeholder", "placeholder",
#     "placeholder", "placeholder", "placeholder", "placeholder"
#   ],
#   "Amount": [
#     114.540834, -237.503744, -67.769839, 83.321293, 64.157803,
#     190.504518, -87.061611, 120.096091, -158.466882, -85.630645,
#     97.706548, -112.436867, 0, 0, 0, 0, 0, 0, 0, 0
#   ],
#   "Currency": [
#     "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD",
#     "USD", "USD", "placeholder", "placeholder", "placeholder", "placeholder",
#     "placeholder", "placeholder", "placeholder", "placeholder"
#   ],
#   "Card Type": [
#     "Mastercard", "Visa", "Visa", "Visa", "Visa", "Visa", "Visa", "Visa",
#     "Mastercard", "Mastercard", "Visa", "Visa", "placeholder", "placeholder",
#     "placeholder", "placeholder", "placeholder", "placeholder", "placeholder",
#     "placeholder"
#   ],
#   "Billing Country": [
#     "CA", "GB", "GB", "GB", "GB", "US", "GB", "CA", "GB", "GB", "CA", "CA",
#     "placeholder", "placeholder", "placeholder", "placeholder",
#     "placeholder", "placeholder", "placeholder", "placeholder"
#   ],
#  "Transaction Country": [
#   "US", "CA", "US", "US", "CA", "US", "GB", "CA", "US", "GB",
#   "CA", "CA", "placeholder", "placeholder", "placeholder", "placeholder",
#   "placeholder", "placeholder", "placeholder", "placeholder"
# ]
# }

In [8]:
import pandas as pd
import numpy as np

# Creating a synthetic dataset
np.random.seed(42)

# Number of samples
n_samples = 1000

# Features: transaction_amt, location, time_of_day, transaction_type
data = {
    'transaction_amt': np.random.normal(loc=100, scale=50, size=n_samples),
    'location': np.random.choice(['A', 'B', 'C'], size=n_samples),
    'time_of_day': np.random.choice(['morning', 'afternoon', 'evening'], size=n_samples),
    'transaction_type': np.random.choice(['online', 'in-store'], size=n_samples),
    'fraudulent': np.random.choice([0, 1], size=n_samples, p=[0.8, 0.2])  # 5% fraudulent transactions
}

# Create DataFrame
df = pd.DataFrame(data)

# Print first few rows of the dataset
df.head()

Unnamed: 0,transaction_amt,location,time_of_day,transaction_type,fraudulent
0,124.835708,C,morning,in-store,0
1,93.086785,A,afternoon,online,1
2,132.384427,C,afternoon,online,0
3,176.151493,A,afternoon,in-store,0
4,88.292331,C,morning,online,0


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features and target variable
X = df[['transaction_amt', 'location', 'time_of_day', 'transaction_type']]
y = df['fraudulent']

# Convert categorical variables into dummy/indicator variables
X = pd.get_dummies(X, columns=['location', 'time_of_day', 'transaction_type'], drop_first=True)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Logistic regression

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Initialize logistic regression model
logreg_model = LogisticRegression()

# Fit the model on the training data
logreg_model.fit(X_train_scaled, y_train)

# Predict on the testing data
y_pred_logreg = logreg_model.predict(X_test_scaled)

# Evaluate the model
print("Logistic Regression Model:")
print(classification_report(y_test, y_pred_logreg))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_logreg))


Logistic Regression Model:
              precision    recall  f1-score   support

           0       0.82      1.00      0.90       164
           1       0.00      0.00      0.00        36

    accuracy                           0.82       200
   macro avg       0.41      0.50      0.45       200
weighted avg       0.67      0.82      0.74       200

Confusion Matrix:
[[164   0]
 [ 36   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Naive bayes

In [12]:
from sklearn.naive_bayes import GaussianNB

# Initialize Gaussian Naive Bayes model
nb_model = GaussianNB()

# Fit the model on the training data
nb_model.fit(X_train_scaled, y_train)

# Predict on the testing data
y_pred_nb = nb_model.predict(X_test_scaled)

# Evaluate the model
print("\nGaussian Naive Bayes Model:")
print(classification_report(y_test, y_pred_nb))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_nb))


Gaussian Naive Bayes Model:
              precision    recall  f1-score   support

           0       0.82      1.00      0.90       164
           1       0.00      0.00      0.00        36

    accuracy                           0.82       200
   macro avg       0.41      0.50      0.45       200
weighted avg       0.67      0.82      0.74       200

Confusion Matrix:
[[164   0]
 [ 36   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
