In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import pandas as pd
# Load the dataset
data = pd.read_csv("D:/Project 1 Credit Card Fraud Detection/Data/credit_card.csv")

# Split features and target variable
X = data.drop(columns=["Class"])
y = data["Class"]

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Scale the features (important for some models like SVM, Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply SMOTE for oversampling the minority class
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

# Define and train a Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_resampled, y_train_resampled)

# Predictions and evaluation
y_pred = rf.predict(X_test_scaled)
y_pred_proba = rf.predict_proba(X_test_scaled)[:, 1]  # Get probabilities for AUC

# Print the classification report and AUC-ROC score
print("Classification Report:")
print(classification_report(y_test, y_pred))
print(f"AUC-ROC: {roc_auc_score(y_test, y_pred_proba):.4f}")


In [7]:
import pandas as pd

# Path to your CSV file
file_path = 'D:/Project 1 Credit Card Fraud Detection/Data/credit_card.csv'  # Replace with the path to your CSV file

# Read the CSV file
df = pd.read_csv(file_path)

# Check if the 'class' column exists
if 'Class' not in df.columns:
    print("Error: 'class' column not found in the dataset.")
else:
    # Filter rows where the 'class' column has value 1 (fraud)
    fraud_rows = df[df['Class'] == 1]

    # Check if there are any rows with class 1
    if fraud_rows.empty:
        print("No rows with class 1 (fraud) found.")
    else:
        # Display the rows with class 1
        print(f"Found {len(fraud_rows)} rows with class 1 (fraud):")
        print(fraud_rows)

        # Optionally, save the result to a new CSV file
        fraud_rows.to_csv('fraud_rows.csv', index=False)
        print("Fraud rows have been saved to 'fraud_rows.csv'.")


Found 492 rows with class 1 (fraud):
            Time        V1        V2        V3        V4        V5        V6  \
541        406.0 -2.312227  1.951992 -1.609851  3.997906 -0.522188 -1.426545   
623        472.0 -3.043541 -3.157307  1.088463  2.288644  1.359805 -1.064823   
4920      4462.0 -2.303350  1.759247 -0.359745  2.330243 -0.821628 -0.075788   
6108      6986.0 -4.397974  1.358367 -2.592844  2.679787 -1.128131 -1.706536   
6329      7519.0  1.234235  3.019740 -4.304597  4.732795  3.624201 -1.357746   
...          ...       ...       ...       ...       ...       ...       ...   
279863  169142.0 -1.927883  1.125653 -4.518331  1.749293 -1.566487 -2.010494   
280143  169347.0  1.378559  1.289381 -5.004247  1.411850  0.442581 -1.326536   
280149  169351.0 -0.676143  1.126366 -2.213700  0.468308 -1.120541 -0.003346   
281144  169966.0 -3.113832  0.585864 -5.399730  1.817092 -0.840618 -2.943548   
281674  170348.0  1.991976  0.158476 -2.583441  0.408670  1.151147 -0.096695   

  