In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import joblib

In [13]:
data = pd.read_csv('creditcard.csv')

In [14]:
Q1 = data['Amount'].quantile(0.25)
Q3 = data['Amount'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
data = data[(data['Amount'] >= lower_bound) & (data['Amount'] <= upper_bound)]

In [15]:
data.dropna(inplace=True)

In [16]:
correlation_with_class = data.corr()['Class'].abs()
correlation_threshold = 0.1

In [17]:
low_correlation_features = correlation_with_class[correlation_with_class < correlation_threshold].index
data = data.drop(columns=low_correlation_features)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_data = pd.concat([X_train, y_train], axis=1)
train_data.to_csv('train_credit_card_transactions.csv', index=False)

test_data = pd.concat([X_test, y_test], axis=1)
test_data.to_csv('test_credit_card_transactions.csv', index=False)

In [23]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [24]:
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

LogisticRegression(random_state=42)

In [25]:
joblib.dump(model, 'fraud_detection_model.pkl')

['fraud_detection_model.pkl']