In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('data/creditcard.csv')

# Check for missing values
print(df.isnull().sum())

# Separate features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64


In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix

# Initialize the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# Save the model
import joblib
joblib.dump(rf_model, 'models/model.pkl')


Accuracy: 0.9995084442259752
ROC AUC: 0.8724226008636431
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.96      0.74      0.84        98

    accuracy                           1.00     56962
   macro avg       0.98      0.87      0.92     56962
weighted avg       1.00      1.00      1.00     56962

[[56861     3]
 [   25    73]]


['models/model.pkl']

In [5]:
import numpy as np
import joblib

# Load the trained model
model = joblib.load('models/model.pkl')

# Define some sample inputs (as numpy arrays)
# Example of a legitimate transaction (expected output: 0)
sample_input_legit = np.array([[0, -1.359807134, -0.072781173, 2.536346737, 1.378155224, -0.33832077, 
                                0.462387778, 0.239598554, 0.098697901, 0.36378697, 0.090794172, 
                                -0.551599533, -0.617800856, -0.991389847, -0.311169354, 1.468176972, 
                                -0.470400525, 0.207971242, 0.02579058, 0.40399296, 0.251412099, 
                                -0.018306778, 0.277837576, -0.11047391, 0.066928075, 0.128539358, 
                                -0.189114844, 0.133558377, 2.24503362, 149.62]])

# Example of a fraudulent transaction (expected output: 1)
sample_input_fraud = np.array([[406,-2.3122265423263,1.95199201064158,-1.60985073229769,3.9979055875468,
                                -0.522187864667764,-1.42654531920595,-2.53738730624579,1.39165724829804,
                                -2.77008927719433,-2.77227214465915,3.20203320709635,-2.89990738849473,
                                -0.595221881324605,-4.28925378244217,0.389724120274487,-1.14074717980657,
                                -2.83005567450437,-0.0168224681808257,0.416955705037907,0.126910559061474,
                                0.517232370861764,-0.0350493686052974,-0.465211076182388,0.320198198514526,
                                0.0445191674731724,0.177839798284401,0.261145002567677,-0.143275874698919,0]])

# Make predictions using the model
prediction_legit = model.predict(sample_input_legit)
prediction_fraud = model.predict(sample_input_fraud)

# Display the results
print(f"Sample Legitimate Transaction Input: {sample_input_legit.flatten().tolist()}")
print(f"Model Output: {prediction_legit[0]}\n")

print(f"Sample Fraudulent Transaction Input: {sample_input_fraud.flatten().tolist()}")
print(f"Model Output: {prediction_fraud[0]}")


Sample Legitimate Transaction Input: [0.0, -1.359807134, -0.072781173, 2.536346737, 1.378155224, -0.33832077, 0.462387778, 0.239598554, 0.098697901, 0.36378697, 0.090794172, -0.551599533, -0.617800856, -0.991389847, -0.311169354, 1.468176972, -0.470400525, 0.207971242, 0.02579058, 0.40399296, 0.251412099, -0.018306778, 0.277837576, -0.11047391, 0.066928075, 0.128539358, -0.189114844, 0.133558377, 2.24503362, 149.62]
Model Output: 0 (Expected: 0)

Sample Fraudulent Transaction Input: [406.0, -2.3122265423263, 1.95199201064158, -1.60985073229769, 3.9979055875468, -0.522187864667764, -1.42654531920595, -2.53738730624579, 1.39165724829804, -2.77008927719433, -2.77227214465915, 3.20203320709635, -2.89990738849473, -0.595221881324605, -4.28925378244217, 0.389724120274487, -1.14074717980657, -2.83005567450437, -0.0168224681808257, 0.416955705037907, 0.126910559061474, 0.517232370861764, -0.0350493686052974, -0.465211076182388, 0.320198198514526, 0.0445191674731724, 0.177839798284401, 0.261145