In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Load and print your dataset
data= pd.read_csv('creditcard .csv')
print("Customer Credit Card Details:")
print(data)

In [None]:
# Data Cleansing
# Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:\n", missing_values)

In [None]:
# Exploratory Data Analysis (EDA)
# Summary statistics
summary = data.describe()
print("\nSummary Statistics:\n", summary)

In [None]:
# Class distribution
class_counts = data['Class'].value_counts()
print("\nClass Distribution:\n", class_counts)

In [None]:
# Data Visualization
# Visualize class distribution
plt.figure(figsize=(3, 3))
sns.countplot(x='Class', data=data,color="Teal")
plt.title('Class Distribution')
plt.show()

In [None]:
# Pie chart for class distribution
plt.figure(figsize=(6, 6))
labels = ['Non-Fraudulent', 'Fraudulent']
sizes = [class_counts[0], class_counts[1]]
colors = ['lightblue', 'blue']
explode = (0, 0.1) # Explode the "Fraudulent" slice
plt.pie(sizes, explode=explode, labels=labels, colors=colors, 
autopct='%1.1f%%', shadow=True, startangle=140)
plt.title('Class Distribution')
plt.show()

In [None]:
#Frequency diagram between class and amount
plt.figure(figsize=(6, 4))
sns.distplot(data[data['Class'] == 0]['Amount'], bins=50, kde=True, color='yellow', label='Non-Fraudulent')
sns.distplot(data[data['Class'] == 1]['Amount'], bins=50, kde=True, color='red', label='Fraudulent')
plt.title('Transaction Amount by Class')
plt.xlabel('Amount')
plt.ylabel('Frequency of class')
plt.legend()
plt.show()

In [None]:
#Pairplot of selected features
#Pairplot of V1 and V2
key_features = ["V1","V2"]
sns.pairplot(data, hue='Class', vars=key_features)
plt.show()

In [None]:
#Pairplot of V3 and V4
key_features = ["V3","V4"]
sns.pairplot(data, hue='Class', vars=key_features)
plt.show()

In [None]:
#Boxplot of Amount by class
plt.figure(figsize=(8, 6))
sns.boxplot(x='Class', y='Amount', data=data, showfliers=False)
plt.title('Amount by Class')
plt.xlabel('Class')
plt.ylabel('Amount')
plt.show()

In [None]:
#Boxplot of Time by class
plt.figure(figsize=(8, 6))
sns.boxplot(x='Class', y='Time', data=data, showfliers=False)
plt.title('Time by Class')
plt.xlabel('Class')
plt.ylabel('Time')
plt.show()

In [None]:
#Scatter plot for selected featuers
# Scatter plots for V1 and V2
plt.figure(figsize=(10, 8))
sns.scatterplot(x='V1', y='V2', data=data, hue='Class')
plt.title('Scatter Plot of V1 vs. V2')
plt.show()

In [None]:
#Scatter plot for V3 and V4
plt.figure(figsize=(10, 8))
sns.scatterplot(x='V3', y='V4', data=data, hue='Class')
plt.title('Scatter Plot of V3 vs. V4')
plt.show()

In [None]:
# Correlation matrix
correlation_matrix = data.corr()
plt.figure(figsize=(35, 35))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

In [None]:
data.describe()

In [None]:
# Split the dataset into features and target variable
x=data.drop('Class', axis=1)
y=data['Class']
# Split the data into training and testing sets
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# Feature scaling (optional but can help with logistic regression)
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

In [None]:
# Create and train the Logistic Regression model
model=LogisticRegression()
model.fit(x_train, y_train)

In [None]:
# Make predictions on the test data
y_pred=model.predict(x_test)

In [None]:
# Evaluate the model
acc=accuracy_score(y_test, y_pred)
pre=precision_score(y_test, y_pred)
rec=recall_score(y_test, y_pred)
f1=f1_score(y_test, y_pred)
con=confusion_matrix(y_test, y_pred)

In [None]:
#Displaying model evaluation
print("Model Evaluation:")
print(f"Recall: {rec}")
print(f"F1 Score: {f1}")

#Displaying Confusion Matrix
print("Confusion Matrix:")
print(con)

#Displaying Precision and Accuracy
print(f"Precision: {pre}")
print(f"Accuracy: {acc}")

In [None]:
# Generate a classification report
rep=classification_report(y_test, y_pred)
print("\nClassification Report:")
print(rep)