In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

In [2]:
# Load the dataset
df = pd.read_csv('creditcard.csv')

In [3]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
# Separate features and target
X = df.drop('Class', axis=1)
y = df['Class']

In [5]:
# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.168375e-15,3.416908e-16,-1.379537e-15,2.074095e-15,9.604066e-16,1.487313e-15,-5.556467e-16,1.213481e-16,-2.406331e-15,...,1.654067e-16,-3.568593e-16,2.578648e-16,4.473266e-15,5.340915e-16,1.683437e-15,-3.660091e-16,-1.22739e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [7]:
# Handle imbalance using SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_scaled, y)

In [8]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [9]:
from sklearn.ensemble import RandomForestClassifier

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
from sklearn.metrics import classification_report, confusion_matrix

# Make predictions
y_pred = model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726



In [14]:
print(df.iloc[100])


Time      68.000000
V1         1.156939
V2         0.037215
V3         0.556799
V4         0.519507
V5        -0.479754
V6        -0.352714
V7        -0.222487
V8         0.158242
V9         0.011252
V10        0.105584
V11        1.612099
V12        0.354493
V13       -1.434536
V14        0.796995
V15        0.745106
V16        0.222868
V17       -0.229199
V18       -0.364809
V19       -0.254105
V20       -0.221852
V21       -0.182662
V22       -0.612268
V23        0.197305
V24        0.174883
V25        0.032497
V26        0.099480
V27       -0.026816
V28        0.004199
Amount     2.690000
Class      0.000000
Name: 100, dtype: float64


In [20]:
import numpy as np

def test_transaction():
    print("\nEnter the transaction details (30 values):")
    input_values = []
    for i in range(30):
        val = float(input(f"Value for feature V{i} (or Time/Amount): "))
        input_values.append(val)

    input_scaled = scaler.transform([input_values])
    prediction = model.predict(input_scaled)

    if prediction[0] == 1:
        print("⚠️ Fraudulent Transaction Detected!")
    else:
        print("✅ Legitimate Transaction.")

# Run testing interface
test_transaction()



Enter the transaction details (30 values):
Value for feature V0 (or Time/Amount): 0
Value for feature V1 (or Time/Amount): 1.156939
Value for feature V2 (or Time/Amount): 0.037215
Value for feature V3 (or Time/Amount): 0.556799
Value for feature V4 (or Time/Amount): 0.519507
Value for feature V5 (or Time/Amount): -0.479754
Value for feature V6 (or Time/Amount): -0.352714
Value for feature V7 (or Time/Amount): -0.222487
Value for feature V8 (or Time/Amount): 0.158242
Value for feature V9 (or Time/Amount): 0.011252
Value for feature V10 (or Time/Amount): 0.105584
Value for feature V11 (or Time/Amount): 1.612099
Value for feature V12 (or Time/Amount): 0.354493
Value for feature V13 (or Time/Amount): -1.434536
Value for feature V14 (or Time/Amount): 0.796995
Value for feature V15 (or Time/Amount): 0.1745106
Value for feature V16 (or Time/Amount): 0.222868
Value for feature V17 (or Time/Amount): -0.229199
Value for feature V18 (or Time/Amount): -0.364809
Value for feature V19 (or Time/Amou



In [16]:
sample_row = X.iloc[100]
sample_scaled = scaler.transform([sample_row])
pred = model.predict(sample_scaled)
print("Fraud" if pred[0] == 1 else "Legit")


Legit


