# Credit Card Fraud Detection
This notebook demonstrates the process of building a machine learning model to identify fraudulent credit card transactions.

In [5]:
!pip install imbalanced-learn
!pip install pandas numpy scikit-learn imbalanced-learn matplotlib seaborn



In [6]:
# Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

## Load and Explore the Dataset

In [7]:
# Load the dataset
data = pd.read_csv('creditcard.csv')
# Display basic information
print(data.info())
# Check for class imbalance
print(data['Class'].value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

## Visualize the Dataset

In [None]:
# Visualize the class distribution
sns.countplot(x='Class', data=data)
plt.title('Class Distribution')
plt.show()

# Visualize correlations between features
correlation_matrix = data.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, cmap='coolwarm', annot=False)
plt.title('Feature Correlation Heatmap')
plt.show()

# Visualize the distribution of a few features
features_to_plot = ['V1', 'V2', 'V3', 'V4']
for feature in features_to_plot:
    sns.histplot(data[feature], kde=True, bins=30)
    plt.title(f'Distribution of {feature}')
    plt.show()

## Preprocess and Normalize the Data

In [8]:
# Separate features and target
X = data.drop('Class', axis=1)
y = data['Class']
# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Handle Class Imbalance

In [9]:
# Apply SMOTE for oversampling
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

## Split the Dataset into Training and Testing Sets

In [10]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

## Train a Classification Model

In [None]:
# Train a Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

## Evaluate the Model

In [None]:
# Make predictions
y_pred = rf_model.predict(X_test)
# Evaluate using precision, recall, and F1-score
print(classification_report(y_test, y_pred))
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f'Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

## Save the Model to a File

In [None]:
# Save the trained model using pickle
import pickle
with open('rf_model.pkl', 'wb') as model_file:
    pickle.dump(rf_model, model_file)