In [1]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
output_file = "preprocessed_creditcard.csv"

# Check if file already exists
if os.path.exists(output_file):
    print(f"File '{output_file}' already exists. Skipping creation.")
else:
    # Load data
    df = pd.read_csv("creditcard.csv")

    # Scale Time and Amount
    scaler = StandardScaler()
    df["scaled_Time"] = scaler.fit_transform(df["Time"].values.reshape(-1, 1))
    df["scaled_Amount"] = scaler.fit_transform(df["Amount"].values.reshape(-1, 1))

    # drop original columns
    df = df.drop(columns=["Time", "Amount"])

    # rename scaled columns
    df.rename(columns={"Time": "scaled_Time", "Amount": "scaled_Amount"}, inplace=True)

    # Save new file
    df.to_csv(output_file, index=False)

    print(f"Preprocessed file saved as '{output_file}'.")

Preprocessed file saved as 'preprocessed_creditcard.csv'.


In [3]:
print(df[['scaled_Time', 'scaled_Amount']].describe())

        scaled_Time  scaled_Amount
count  2.848070e+05   2.848070e+05
mean  -5.109395e-17  -3.672378e-17
std    1.000002e+00   1.000002e+00
min   -1.996583e+00  -3.532294e-01
25%   -8.552120e-01  -3.308401e-01
50%   -2.131453e-01  -2.652715e-01
75%    9.372174e-01  -4.471707e-02
max    1.642058e+00   1.023622e+02
