# **Credit Card Fraud Detection (Part 1)**

This notebook focuses on loading and exploring the dataset, performing preprocessing steps, 
and preparing data for model training.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

%matplotlib inline


In [None]:
# Load dataset
df = pd.read_csv("creditcard.csv")
df.head()


In [None]:
# Dataset info
df.info()

# Check missing values
print("\nMissing values:\n", df.isnull().sum())

# Basic statistics
df.describe()


In [None]:
# Class distribution visualization
plt.figure(figsize=(5,4))
sns.countplot(x='Class', data=df)
plt.title('Class Distribution (0 = Genuine, 1 = Fraud)')
plt.show()


In [None]:
# Split features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Scale 'Time' and 'Amount'
scaler = StandardScaler()
X[['Time', 'Amount']] = scaler.fit_transform(X[['Time', 'Amount']])

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training shape:", X_train.shape)
print("Testing shape:", X_test.shape)
