<a href="https://colab.research.google.com/github/Sara102006/CODSOFT-ML/blob/main/Codsoftml2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Credit card fraud detection using different models
import numpy as np
import pandas as pd
import gc

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

from imblearn.over_sampling import SMOTE

df = pd.read_csv("fraudTest.csv")
df = df.sample(frac=0.3, random_state=42)  # remove or change frac if needed
print("Dataset Shape:", df.shape)

target_col = "is_fraud"
print("\nClass Distribution:")
print(df[target_col].value_counts())

drop_cols = ["trans_num", "street"]
drop_cols = [c for c in drop_cols if c in df.columns]
df.drop(columns=drop_cols, inplace=True)

for col in df.select_dtypes(include=["float64"]).columns:
    df[col] = df[col].astype("float32")

for col in df.select_dtypes(include=["int64"]).columns:
    df[col] = df[col].astype("int32")

df[target_col] = df[target_col].astype("int8")

X = df.drop(target_col, axis=1)
y = df[target_col]

le = LabelEncoder()
for col in X.select_dtypes(include="object").columns:
    X[col] = le.fit_transform(X[col])

print("\nFeature Shape After Encoding:", X.shape)

del df
gc.collect()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

del X
gc.collect()

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

del X_scaled
gc.collect()

smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("\nAfter SMOTE:")
print(pd.Series(y_train_res).value_counts())

del X_train, y_train
gc.collect()

# MODELS
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, n_jobs=-1),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        n_jobs=-1
    )
}

for name, model in models.items():
    model.fit(X_train_res, y_train_res)
    preds = model.predict(X_test)

    print("\n==============================")
    print(name)
    print("==============================")
    print(confusion_matrix(y_test, preds))
    print(classification_report(y_test, preds))
    print("ROC-AUC Score:", roc_auc_score(y_test, preds))