In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from keras.models import Sequential
from keras.layers import Dense

In [None]:
# Step 1: Load and preprocess data
df = pd.read_csv('creditcard.csv')
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0.0
2,1,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0.0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103083,68487,-1.934422,1.203459,0.033067,-0.735334,-0.522726,-0.326238,-0.722219,1.277395,-0.888523,...,0.147071,0.059291,0.188213,-0.224906,-0.156644,0.931925,-0.740243,-0.253004,3.76,0.0
103084,68488,-1.285444,1.986559,0.916337,2.743234,-0.714003,0.037372,-0.363364,0.955635,-1.638590,...,-0.036838,-0.509744,0.128240,0.273890,-0.394648,-0.086939,-0.339747,-0.025241,12.92,0.0
103085,68488,-0.000129,-3.131204,0.506807,0.150330,-2.509942,0.052333,-0.485208,0.138229,0.120389,...,0.443911,-0.198443,-0.538984,0.538052,0.006565,-0.337456,-0.089457,0.127887,673.56,0.0
103086,68488,1.146771,-0.158963,0.503270,0.611880,-0.348530,0.310625,-0.372723,0.275797,0.466993,...,-0.250705,-0.625876,0.062359,-0.310699,0.236061,0.216203,-0.011947,-0.001154,12.95,0.0


In [None]:
# Drop the 'Time' column as it may not be relevant for fraud detection
df.drop('Time', axis=1, inplace=True)

In [None]:
# Check for missing values
print("Missing values:", df.isnull().sum().sum())

Missing values: 6


In [None]:
# Drop rows with missing values
df.dropna(inplace=True)


In [None]:
# Step 2: Split data into features (X) and target (y)
X = df.drop("Class", axis=1)
y = df["Class"]

In [None]:
# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Step 4: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Step 5: Train a machine learning model (Random Forest Classifier)
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train_scaled, y_train)


In [None]:
# Step 6: Evaluate model performance
y_pred_rf = model_rf.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Classifier Accuracy:", accuracy_rf)
print("Classification Report:")
print(classification_report(y_test, y_pred_rf))


Random Forest Classifier Accuracy: 0.9993694829760403
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     20565
         1.0       0.98      0.77      0.86        53

    accuracy                           1.00     20618
   macro avg       0.99      0.89      0.93     20618
weighted avg       1.00      1.00      1.00     20618



In [None]:
# Step 7: Train a neural network model (using Keras)
model_nn = Sequential()
model_nn.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model_nn.add(Dense(64, activation='relu'))
model_nn.add(Dense(1, activation='sigmoid'))


In [None]:
model_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_nn.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x78927ee9d330>

In [None]:
# Step 8: Evaluate neural network model performance
_, accuracy_nn = model_nn.evaluate(X_test_scaled, y_test)
print("Neural Network Accuracy:", accuracy_nn)


Neural Network Accuracy: 0.9991269707679749


In [None]:
# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

Confusion Matrix:
[[20564     1]
 [   12    41]]
