In [3]:
import pandas as pd

# 1. Load the Data
print("Loading data... please wait.")
df = pd.read_csv('fraudTrain.csv')

# --- 2. DESCRIPTIVE STATISTICS ---
print("\n" + "="*40)
print("DESCRIPTIVE ANALYSIS")
print("="*40)

# Mean vs Median (Skewness Check)
mean_val = df['amt'].mean()
median_val = df['amt'].median()
print(f"Mean Amount:   ${mean_val:.2f}")
print(f"Median Amount: ${median_val:.2f}")

# Standard Deviation (Outlier Check)
std_dev = df['amt'].std()
print(f"Std Deviation: {std_dev:.2f}")

# Skewness
skew = df['amt'].skew()
print(f"Skewness:      {skew:.2f}")

if skew > 1:
    print("\n>> CONCLUSION: Data is HIGHLY SKEWED (Right Skewed).")
    print(">> REASON: We have extreme outliers (Fraud transactions).")
    print(">> NEXT STEP: We must perform Log Transformation.")

Loading data... please wait.

DESCRIPTIVE ANALYSIS
Mean Amount:   $71.69
Median Amount: $47.74
Std Deviation: 161.63
Skewness:      29.53

>> CONCLUSION: Data is HIGHLY SKEWED (Right Skewed).
>> REASON: We have extreme outliers (Fraud transactions).
>> NEXT STEP: We must perform Log Transformation.


In [4]:
# --- 3. ADDITIONAL STATS (Mode & Quantiles) ---
print("\n" + "="*40)
print("EXTENDED DESCRIPTIVE STATS")
print("="*40)

# Mode (Most common transaction amount)
mode_val = df['amt'].mode()[0]
print(f"Mode (Most Frequent): ${mode_val:.2f}")

# Quantiles
q1 = df['amt'].quantile(0.25)
q3 = df['amt'].quantile(0.75)
print(f"25th Percentile (Q1): ${q1:.2f}")
print(f"75th Percentile (Q3): ${q3:.2f}")

# Measure of Dispersion (Range)
data_range = df['amt'].max() - df['amt'].min()
print(f"Range (Max - Min):   ${data_range:.2f}")

# Kurtosis
kurt = df['amt'].kurt()
print(f"Kurtosis:            {kurt:.2f}")

print("\n" + "="*40)


EXTENDED DESCRIPTIVE STATS
Mode (Most Frequent): $3.52
25th Percentile (Q1): $9.64
75th Percentile (Q3): $83.71
Range (Max - Min):   $15033.18
Kurtosis:            1762.37

