In [None]:
# ============================================================================
# STEP 1: DATA LOADING & INITIAL EXPLORATION (robust loader)
# This cell looks for the dataset in common locations and reports which path is used.
# ============================================================================
from pathlib import Path
import pandas as pd

# Print current working directory to confirm where the notebook is running from
print('Working dir:', Path.cwd())

# Candidate paths to try (order: processed -> raw -> top-level data)
candidates = [
    Path('data') / 'processed' / 'Fraud_Analysis_Dataset.csv',
    Path('data') / 'raw' / 'Fraud_Analysis_Dataset.csv',
    Path('data') / 'Fraud_Analysis_Dataset.csv'
]

for p in candidates:
    if p.exists():
        print(f'✓ Loading dataset from: {p}')
        df_raw = pd.read_csv(p)
        break
else:
    searched = ', '.join(str(x) for x in candidates)
    raise FileNotFoundError(
        f'Fraud dataset not found. Searched: {searched}\n'
        'Make sure the notebook\'s working directory is the project root or update the path accordingly.'
    )

print('=' * 70)
print(' RAW DATA LOADED SUCCESSFULLY')
print('=' * 70)
print(f'Dataset Shape: {df_raw.shape[0]:,} rows × {df_raw.shape[1]} columns\n')
print('First 5 rows:')
display(df_raw.head())

 RAW DATA LOADED SUCCESSFULLY
Dataset Shape: 11,142 rows × 10 columns

First 5 rows:


Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud
0,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1
1,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1
2,1,TRANSFER,2806.0,C1420196421,2806.0,0.0,C972765878,0.0,0.0,1
3,1,CASH_OUT,2806.0,C2101527076,2806.0,0.0,C1007251739,26202.0,0.0,1
4,1,TRANSFER,20128.0,C137533655,20128.0,0.0,C1848415041,0.0,0.0,1
