In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [5]:
df = pd.read_csv("fraud_dataset.csv", na_values=["null", "NULL", "NaN", "nan"])
df.head()


Unnamed: 0,TransactionID,Amount,Location,Device,IsFraud
0,1,200,New York,Mobile,0
1,2,500,London,Desktop,1
2,3,120,Delhi,Mobile,0
3,4,2500,Sydney,Tablet,1
4,5,75,Tokyo,Mobile,0


In [6]:
print("Missing values:\n", df.isnull().sum())
df = df.fillna(0)
print("Dataset shape:", df.shape)


Missing values:
 TransactionID    0
Amount           0
Location         0
Device           0
IsFraud          0
dtype: int64
Dataset shape: (10, 5)


In [7]:
print(df.describe())

if "Class" in df.columns:
    sns.countplot(x="Class", data=df)
    plt.title("Fraud vs Non-Fraud")
    plt.show()


       TransactionID       Amount    IsFraud
count       10.00000    10.000000  10.000000
mean         5.50000   846.500000   0.500000
std          3.02765   891.272654   0.527046
min          1.00000    75.000000   0.000000
25%          3.25000   230.000000   0.000000
50%          5.50000   475.000000   0.500000
75%          7.75000  1275.000000   1.000000
max         10.00000  2500.000000   1.000000


In [None]:
X = df.drop("IsFraud", axis=1)   # Features (input data)
y = df["IsFraud"]                # Target (fraud or not)


print("Feature shape:", X.shape)
print("Target shape:", y.shape)


KeyError: "['Class'] not found in axis"

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Fraud", "Fraud"], yticklabels=["Not Fraud", "Fraud"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
import joblib
joblib.dump(model, "fraud_detection_model.pkl")
print("✅ Model saved as fraud_detection_model.pkl")
