# Credit Card Transaction Data Preprocessing

This notebook loads the credit card transaction data from `Transaction_Data.xlsx`, cleans it, computes Z-scores for statistical profiling, and saves a cleaned dataset for anomaly detection and Tableau import.

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Set up file paths
DATA_DIR = Path('../data')
INPUT_FILE = DATA_DIR / 'Transaction_Data.xlsx'
OUTPUT_FILE = DATA_DIR / 'Cleaned_Transaction_Data.xlsx'
DATA_DIR.mkdir(exist_ok=True)

# Load data
try:
    df = pd.read_excel(INPUT_FILE)
    print('Data Shape:', df.shape)
    print(df.head())
except FileNotFoundError:
    print(f'Error: {INPUT_FILE} not found.')
    raise

In [None]:
# Handle missing values
print('Missing Values:\n', df.isnull().sum())
df['Is_Fraud'] = df['Is_Fraud'].fillna('No')
df['Merchant'] = df['Merchant'].fillna('Unknown')
df['Location'] = df['Location'].fillna('Unknown')
df = df.dropna(subset=['Transaction_ID', 'User_ID', 'Transaction_Amount', 'Transaction_Date'])
print('Data Shape after cleaning:', df.shape)

In [None]:
# Convert data types
df['Transaction_Date'] = pd.to_datetime(df['Transaction_Date'], errors='coerce')
df['Transaction_Amount'] = pd.to_numeric(df['Transaction_Amount'], errors='coerce')

# Compute Z-scores for each user's transactions
df['Z_Score_Amount'] = df.groupby('User_ID')['Transaction_Amount'].transform(
    lambda x: (x - x.mean()) / x.std() if x.std() != 0 else 0
)

# Save cleaned data
df.to_excel(OUTPUT_FILE, index=False)
print(f'Cleaned data saved to {OUTPUT_FILE}')