<a href="https://colab.research.google.com/github/Bhavya-2004-svg/credit-card-fraud-detection/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Load the credit card transaction dataset (assumed to be in CSV format)
# Replace with the path to your dataset
data = pd.read_csv("/content/creditcard.csv.zip")

# Display the first few rows of the dataset
print(data.head())

# Assuming the dataset has the following columns:
# 'TransactionID', 'CardID', 'Amount', 'Time', 'Label' (Label: 0 = non-fraud, 1 = fraud)
if 'CardID' not in data.columns:
    # Replace 'ExistingCardIDColumn' with the actual column name if it exists
    # If no such column exists and you want to create a dummy CardID:
    data['CardID'] = 1  # Assigning a dummy value 1 to all transactions
    print("Created 'CardID' column with dummy values.")
# Sort by CardID and Time to create the 'TimeSinceLastTransaction' feature
data = data.sort_values (by=['CardID', 'Time'])

# Calculate time since last transaction for each card
data['TimeSinceLastTransaction'] = data.groupby('CardID')['Time'].diff().fillna(0)

# Create a new label based on TimeSinceLastTransaction
# Define a threshold for "suspicious" behavior (for example, transactions within 10 minutes)
threshold_time = 10  # Set your threshold in minutes (e.g., 10 minutes)

# Create the new label: 1 if TimeSinceLastTransaction is less than threshold, otherwise 0
data['NewLabel'] = (data['TimeSinceLastTransaction'] < threshold_time).astype(int)

# You can keep the original label as well or replace it with the new one, depending on your use case
print("Unique values in the new label:")
print(data['NewLabel'].value_counts())

# Select features and the new target (NewLabel)
features = ['Amount', 'TimeSinceLastTransaction']
X = data[features]
y = data['NewLabel']

# Standardize the features (important for distance-based models or if features vary in scale)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize and train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model's performance
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 