In [None]:
# Step 1: Load necessary libraries
import pandas as pd


In [None]:
# Step 2: Load the dataset
file_path = 'creditcard.csv'  # Update with your file path
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(df.head())


In [None]:
# Step 3: Load additional libraries for visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Check the distribution of the target variable
sns.countplot(x='Class', data=df)
plt.title('Distribution of Fraudulent Transactions')
plt.xlabel('Class (0: Genuine, 1: Fraud)')
plt.ylabel('Count')
plt.show()

# Show basic statistics of the dataset
print(df.describe())


In [None]:
# Step 4: Check for missing values
print(df.isnull().sum())

# Step 4: Load library for scaling
from sklearn.preprocessing import StandardScaler

# Scale the 'Amount' feature
scaler = StandardScaler()
df['scaled_amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))

# Drop the 'Time' and 'Amount' columns, as we'll use 'scaled_amount'
df.drop(['Time', 'Amount'], axis=1, inplace=True)

# Separate features and target variable
X = df.drop('Class', axis=1)
y = df['Class']


In [None]:
# Step 5: Load library for handling imbalanced datasets
from imblearn.over_sampling import SMOTE

# Use SMOTE to handle the imbalanced dataset
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Check the new class distribution
sns.countplot(x=y_resampled)
plt.title('Distribution of Fraudulent Transactions After SMOTE')
plt.xlabel('Class (0: Genuine, 1: Fraud)')
plt.ylabel('Count')
plt.show()


In [None]:
# Step 6: Load library for train-test splitting
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)


In [None]:
# Step 7: Load library for model training
from sklearn.linear_model import LogisticRegression

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:
# Step 8: Load libraries for evaluation
from sklearn.metrics import classification_report, confusion_matrix

# Make predictions
y_pred = model.predict(X_test)

# Classification report
print(classification_report(y_test, y_pred))

# Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(confusion_mat, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
