# Comprehensive Exploratory Data Analysis (EDA)
## Fraud Detection Dataset Analysis

This notebook provides a complete exploratory data analysis of the transactions dataset, including:
- Data loading and basic statistics
- Missing values and duplicate analysis
- Target variable (fraud) distribution
- Numerical and categorical feature analysis
- Time series patterns
- Correlation analysis
- Outlier detection
- Key insights and recommendations

## 1. Import Required Libraries

In [None]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Utilities
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set visualization style for better aesthetics
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 10

print("All libraries imported successfully.")

## 2. Load Dataset

In [None]:
# Load the transactions dataset
df = pd.read_csv(r'f:\Projects\InfosysVirtualInternship-BFSI\transactions_clean.csv')

# Display basic information
print(f"Dataset Shape: {df.shape[0]} rows × {df.shape[1]} columns")
print(f"Memory Usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
print("\nDataset loaded successfully.")

## 3. Initial Data Exploration

In [None]:
# Display first few rows
print("First 5 rows of the dataset:")
df.head()

In [None]:
# Column information
print("Dataset Information:")
print("\nColumn Names and Data Types:")
for idx, (col, dtype) in enumerate(df.dtypes.items(), 1):
    print(f"  {idx:2d}. {col:30s} → {str(dtype)}")

print(f"\nTotal Columns: {len(df.columns)}")
print(f"  - Numeric Columns: {len(df.select_dtypes(include=[np.number]).columns)}")
print(f"  - Object Columns: {len(df.select_dtypes(include=['object']).columns)}")

In [None]:
# Statistical summary
print("Statistical Summary of Numerical Features:")
df.describe()

## 4. Data Quality Check

In [None]:
# Missing values analysis
print("Missing Values Analysis:")
missing_values = df.isnull().sum()
missing_percentage = (df.isnull().sum() / len(df)) * 100
missing_df = pd.DataFrame({
    'Missing Count': missing_values,
    'Percentage': missing_percentage
})

if missing_df['Missing Count'].sum() > 0:
    print("\nColumns with Missing Values:")
    display(missing_df[missing_df['Missing Count'] > 0])
else:
    print("\nNo missing values found in the dataset.")

In [None]:
# Duplicate records check
duplicates = df.duplicated().sum()
print(f"Duplicate Records: {duplicates}")
if duplicates > 0:
    print(f"Percentage of Duplicates: {(duplicates/len(df))*100:.2f}%")
else:
    print("No duplicate records found.")

## 5. Target Variable Analysis (Fraud Detection)

In [None]:
# Fraud distribution
fraud_counts = df['is_fraud'].value_counts().sort_index()
fraud_percentage = df['is_fraud'].value_counts(normalize=True).sort_index() * 100

print("Fraud Distribution:")
print(f"  Legitimate Transactions (0): {fraud_counts[0]:,} ({fraud_percentage[0]:.2f}%)")
print(f"  Fraudulent Transactions (1): {fraud_counts[1]:,} ({fraud_percentage[1]:.2f}%)")
print(f"\n  Overall Fraud Rate: {fraud_percentage[1]:.2f}%")
print(f"  Class Imbalance Ratio: 1:{fraud_counts[0]/fraud_counts[1]:.2f}")

In [None]:
# Visualize fraud distribution
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Count plot
axes[0].bar(['Legitimate', 'Fraud'], fraud_counts.values, 
            color=['#2ecc71', '#e74c3c'], alpha=0.8, edgecolor='black', linewidth=2)
axes[0].set_title('Fraud Distribution - Count', fontsize=14, fontweight='bold', pad=20)
axes[0].set_ylabel('Number of Transactions', fontsize=12)
axes[0].set_xlabel('Transaction Type', fontsize=12)
for i, v in enumerate(fraud_counts.values):
    axes[0].text(i, v + 100, f'{v:,}', ha='center', fontweight='bold', fontsize=11)
axes[0].grid(axis='y', alpha=0.3)

# Pie chart
colors = ['#2ecc71', '#e74c3c']
explode = (0, 0.1)
axes[1].pie(fraud_counts.values, labels=['Legitimate', 'Fraud'], autopct='%1.2f%%', 
            colors=colors, startangle=90, explode=explode, shadow=True, 
            textprops={'fontsize': 11, 'fontweight': 'bold'})
axes[1].set_title('Fraud Distribution - Percentage', fontsize=14, fontweight='bold', pad=20)

# Log scale comparison
axes[2].bar(['Legitimate', 'Fraud'], fraud_counts.values, 
            color=['#2ecc71', '#e74c3c'], alpha=0.8, edgecolor='black', linewidth=2)
axes[2].set_yscale('log')
axes[2].set_title('Fraud Distribution - Log Scale', fontsize=14, fontweight='bold', pad=20)
axes[2].set_ylabel('Number of Transactions (Log Scale)', fontsize=12)
axes[2].set_xlabel('Transaction Type', fontsize=12)
axes[2].grid(axis='y', alpha=0.3, which='both')

plt.tight_layout()
plt.savefig('01_fraud_distribution.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as: 01_fraud_distribution.png")

## 6. Numerical Features Analysis

In [None]:
# Detailed statistics for numerical features
numerical_cols = ['account_age_days', 'transaction_amount', 'hour', 'weekday', 'month', 'transaction_amount_log']

print("Detailed Numerical Features Statistics:\n")
for col in numerical_cols:
    if col in df.columns:
        print(f"--- {col.upper()} ---")
        print(f"  Count: {df[col].count():,}")
        print(f"  Mean: {df[col].mean():.2f}")
        print(f"  Median: {df[col].median():.2f}")
        print(f"  Std Dev: {df[col].std():.2f}")
        print(f"  Min: {df[col].min():.2f}")
        print(f"  25th Percentile: {df[col].quantile(0.25):.2f}")
        print(f"  75th Percentile: {df[col].quantile(0.75):.2f}")
        print(f"  Max: {df[col].max():.2f}")
        print(f"  IQR: {df[col].quantile(0.75) - df[col].quantile(0.25):.2f}")
        print(f"  Skewness: {df[col].skew():.2f}")
        print(f"  Kurtosis: {df[col].kurtosis():.2f}")
        print()

In [None]:
# Visualize numerical features distributions
fig, axes = plt.subplots(3, 2, figsize=(16, 14))

# Transaction Amount
axes[0, 0].hist(df['transaction_amount'], bins=50, color='#3498db', edgecolor='black', alpha=0.7)
axes[0, 0].set_title('Transaction Amount Distribution', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Transaction Amount ($)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].axvline(df['transaction_amount'].mean(), color='red', linestyle='--', linewidth=2, 
                   label=f'Mean: ${df["transaction_amount"].mean():.0f}')
axes[0, 0].axvline(df['transaction_amount'].median(), color='green', linestyle='--', linewidth=2, 
                   label=f'Median: ${df["transaction_amount"].median():.0f}')
axes[0, 0].legend()
axes[0, 0].grid(alpha=0.3)

# Transaction Amount (Log Scale)
axes[0, 1].hist(df['transaction_amount'], bins=50, color='#9b59b6', edgecolor='black', alpha=0.7)
axes[0, 1].set_title('Transaction Amount Distribution (Log Scale)', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('Transaction Amount ($)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_yscale('log')
axes[0, 1].grid(alpha=0.3, which='both')

# Account Age
axes[1, 0].hist(df['account_age_days'], bins=50, color='#e74c3c', edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Account Age Distribution', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Account Age (Days)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].axvline(df['account_age_days'].mean(), color='blue', linestyle='--', linewidth=2, 
                   label=f'Mean: {df["account_age_days"].mean():.0f} days')
axes[1, 0].legend()
axes[1, 0].grid(alpha=0.3)

# Hour Distribution
hour_counts = df['hour'].value_counts().sort_index()
axes[1, 1].bar(hour_counts.index, hour_counts.values, color='#1abc9c', edgecolor='black', alpha=0.7)
axes[1, 1].set_title('Transactions by Hour of Day', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('Hour (0-23)')
axes[1, 1].set_ylabel('Transaction Count')
axes[1, 1].grid(axis='y', alpha=0.3)

# Weekday Distribution
weekday_counts = df['weekday'].value_counts().sort_index()
weekday_labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
axes[2, 0].bar(range(len(weekday_counts)), weekday_counts.values, color='#f39c12', edgecolor='black', alpha=0.7)
axes[2, 0].set_title('Transactions by Day of Week', fontsize=12, fontweight='bold')
axes[2, 0].set_xlabel('Day of Week')
axes[2, 0].set_ylabel('Transaction Count')
axes[2, 0].set_xticks(range(7))
axes[2, 0].set_xticklabels(weekday_labels, rotation=45, ha='right')
axes[2, 0].grid(axis='y', alpha=0.3)

# Box plot for Transaction Amount by Fraud Status
axes[2, 1].boxplot([df[df['is_fraud']==0]['transaction_amount'], 
                   df[df['is_fraud']==1]['transaction_amount']], 
                   labels=['Legitimate', 'Fraud'], patch_artist=True,
                   boxprops=dict(facecolor='lightblue', alpha=0.7),
                   medianprops=dict(color='red', linewidth=2))
axes[2, 1].set_title('Transaction Amount by Fraud Status', fontsize=12, fontweight='bold')
axes[2, 1].set_ylabel('Transaction Amount ($)')
axes[2, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('02_numerical_features_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as: 02_numerical_features_analysis.png")

## 7. Fraud Analysis by Numerical Features

In [None]:
# Compare fraud vs legitimate transactions across numerical features
print("Fraud Analysis by Numerical Features:\n")

for col in ['transaction_amount', 'account_age_days', 'hour', 'weekday']:
    if col in df.columns:
        print(f"--- {col.upper()} BY FRAUD STATUS ---")
        
        legitimate = df[df['is_fraud'] == 0][col]
        fraud = df[df['is_fraud'] == 1][col]
        
        print(f"\n  Legitimate Transactions:")
        print(f"    Mean: {legitimate.mean():.2f}")
        print(f"    Median: {legitimate.median():.2f}")
        print(f"    Std Dev: {legitimate.std():.2f}")
        
        print(f"\n  Fraudulent Transactions:")
        print(f"    Mean: {fraud.mean():.2f}")
        print(f"    Median: {fraud.median():.2f}")
        print(f"    Std Dev: {fraud.std():.2f}")
        
        print(f"\n  Difference (Fraud - Legitimate):")
        print(f"    Mean Difference: {fraud.mean() - legitimate.mean():.2f}")
        print(f"    Median Difference: {fraud.median() - legitimate.median():.2f}")
        print()

In [None]:
# Visualize fraud patterns in numerical features
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Transaction Amount Comparison
axes[0, 0].hist([df[df['is_fraud']==0]['transaction_amount'], 
               df[df['is_fraud']==1]['transaction_amount']], 
               bins=30, label=['Legitimate', 'Fraud'], color=['green', 'red'], alpha=0.6, edgecolor='black')
axes[0, 0].set_title('Transaction Amount Distribution by Fraud Status', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Transaction Amount ($)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()
axes[0, 0].grid(alpha=0.3)

# Account Age Comparison
axes[0, 1].hist([df[df['is_fraud']==0]['account_age_days'], 
               df[df['is_fraud']==1]['account_age_days']], 
               bins=30, label=['Legitimate', 'Fraud'], color=['green', 'red'], alpha=0.6, edgecolor='black')
axes[0, 1].set_title('Account Age Distribution by Fraud Status', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('Account Age (Days)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].legend()
axes[0, 1].grid(alpha=0.3)

# Hour Analysis - Fraud Rate
hour_fraud_rate = df.groupby('hour')['is_fraud'].agg(['sum', 'count'])
hour_fraud_rate['rate'] = (hour_fraud_rate['sum'] / hour_fraud_rate['count']) * 100
axes[1, 0].bar(hour_fraud_rate.index, hour_fraud_rate['rate'], color='#e74c3c', edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Fraud Rate by Hour of Day', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Hour (0-23)')
axes[1, 0].set_ylabel('Fraud Rate (%)')
axes[1, 0].axhline(fraud_percentage[1], color='blue', linestyle='--', linewidth=2, 
                   label=f'Overall Fraud Rate: {fraud_percentage[1]:.2f}%')
axes[1, 0].legend()
axes[1, 0].grid(axis='y', alpha=0.3)

# Weekday Analysis - Fraud Rate
weekday_fraud_rate = df.groupby('weekday')['is_fraud'].agg(['sum', 'count'])
weekday_fraud_rate['rate'] = (weekday_fraud_rate['sum'] / weekday_fraud_rate['count']) * 100
axes[1, 1].bar(range(len(weekday_fraud_rate)), weekday_fraud_rate['rate'], color='#f39c12', edgecolor='black', alpha=0.7)
axes[1, 1].set_title('Fraud Rate by Day of Week', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('Day of Week')
axes[1, 1].set_ylabel('Fraud Rate (%)')
axes[1, 1].set_xticks(range(7))
axes[1, 1].set_xticklabels(weekday_labels, rotation=45, ha='right')
axes[1, 1].axhline(fraud_percentage[1], color='blue', linestyle='--', linewidth=2, 
                   label=f'Overall Fraud Rate: {fraud_percentage[1]:.2f}%')
axes[1, 1].legend()
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('03_fraud_by_numerical_features.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as: 03_fraud_by_numerical_features.png")

## 8. Categorical Features Analysis

In [None]:
# Channel Analysis
print("Transaction Channel Analysis:\n")
channel_cols = [col for col in df.columns if col.startswith('channel_')]
channel_data = {}

for col in channel_cols:
    channel_name = col.replace('channel_', '').upper()
    count = df[col].sum()
    percentage = (count / len(df)) * 100
    channel_data[channel_name] = {'count': count, 'percentage': percentage}
    print(f"  {channel_name}: {int(count):,} transactions ({percentage:.2f}%)")

# KYC Analysis
print("\nKYC Verification Analysis:\n")
kyc_cols = [col for col in df.columns if col.startswith('kyc_verified_')]
kyc_data = {}

for col in kyc_cols:
    kyc_status = col.replace('kyc_verified_', '')
    count = df[col].sum()
    percentage = (count / len(df)) * 100
    kyc_data[kyc_status] = {'count': count, 'percentage': percentage}
    print(f"  {kyc_status}: {int(count):,} transactions ({percentage:.2f}%)")

In [None]:
# Calculate fraud rates by channel and KYC status
channel_fraud_rates = {}
for col in channel_cols:
    channel_name = col.replace('channel_', '').upper()
    fraud_rate = (df[df[col] == 1]['is_fraud'].sum() / df[col].sum()) * 100
    channel_fraud_rates[channel_name] = fraud_rate

kyc_fraud_rates = {}
for col in kyc_cols:
    kyc_status = col.replace('kyc_verified_', '')
    fraud_rate = (df[df[col] == 1]['is_fraud'].sum() / df[col].sum()) * 100
    kyc_fraud_rates[kyc_status] = fraud_rate

print("\nFraud Rate by Channel:")
for channel, rate in channel_fraud_rates.items():
    print(f"  {channel}: {rate:.2f}%")

print("\nFraud Rate by KYC Status:")
for kyc, rate in kyc_fraud_rates.items():
    print(f"  {kyc}: {rate:.2f}%")

In [None]:
# Visualize categorical features
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Channel Distribution
channel_names = list(channel_data.keys())
channel_counts = [channel_data[ch]['count'] for ch in channel_names]
colors_channel = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']
axes[0, 0].bar(channel_names, channel_counts, color=colors_channel, edgecolor='black', alpha=0.7, linewidth=2)
axes[0, 0].set_title('Transaction Distribution by Channel', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Channel')
axes[0, 0].set_ylabel('Transaction Count')
for i, v in enumerate(channel_counts):
    axes[0, 0].text(i, v + 50, f'{int(v):,}', ha='center', fontweight='bold')
axes[0, 0].grid(axis='y', alpha=0.3)

# KYC Distribution Pie Chart
kyc_names = list(kyc_data.keys())
kyc_counts = [kyc_data[kyc]['count'] for kyc in kyc_names]
axes[0, 1].pie(kyc_counts, labels=kyc_names, autopct='%1.1f%%', colors=['#e74c3c', '#2ecc71'], 
            startangle=90, explode=(0.05, 0.05), shadow=True, textprops={'fontsize': 11, 'fontweight': 'bold'})
axes[0, 1].set_title('KYC Verification Status Distribution', fontsize=12, fontweight='bold')

# Channel vs Fraud
channel_fraud_names = list(channel_fraud_rates.keys())
channel_fraud_values = list(channel_fraud_rates.values())
axes[1, 0].bar(channel_fraud_names, channel_fraud_values, color=colors_channel, edgecolor='black', alpha=0.7, linewidth=2)
axes[1, 0].set_title('Fraud Rate by Channel', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Channel')
axes[1, 0].set_ylabel('Fraud Rate (%)')
axes[1, 0].axhline(fraud_percentage[1], color='red', linestyle='--', linewidth=2, 
                   label=f'Overall: {fraud_percentage[1]:.2f}%')
for i, v in enumerate(channel_fraud_values):
    axes[1, 0].text(i, v + 0.1, f'{v:.2f}%', ha='center', fontweight='bold')
axes[1, 0].legend()
axes[1, 0].grid(axis='y', alpha=0.3)

# KYC vs Fraud
kyc_fraud_names = list(kyc_fraud_rates.keys())
kyc_fraud_values = list(kyc_fraud_rates.values())
axes[1, 1].bar(kyc_fraud_names, kyc_fraud_values, color=['#e74c3c', '#2ecc71'], edgecolor='black', alpha=0.7, linewidth=2)
axes[1, 1].set_title('Fraud Rate by KYC Verification Status', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('KYC Status')
axes[1, 1].set_ylabel('Fraud Rate (%)')
axes[1, 1].axhline(fraud_percentage[1], color='blue', linestyle='--', linewidth=2, 
                   label=f'Overall: {fraud_percentage[1]:.2f}%')
for i, v in enumerate(kyc_fraud_values):
    axes[1, 1].text(i, v + 0.2, f'{v:.2f}%', ha='center', fontweight='bold', fontsize=11)
axes[1, 1].legend()
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('04_categorical_features_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as: 04_categorical_features_analysis.png")

## 9. Correlation Analysis

In [None]:
# Select numerical columns for correlation analysis
corr_cols = ['account_age_days', 'transaction_amount', 'hour', 'weekday', 'month', 
             'is_high_value', 'transaction_amount_log', 'is_fraud']
corr_cols = [col for col in corr_cols if col in df.columns]

correlation_matrix = df[corr_cols].corr()

print("Correlation with Fraud (is_fraud):\n")
fraud_correlations = correlation_matrix['is_fraud'].sort_values(ascending=False)
for idx, value in fraud_correlations.items():
    if idx != 'is_fraud':
        print(f"  {idx:30s}: {value:7.4f}")

In [None]:
# Visualize correlation matrix
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

# Full correlation heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, fmt='.3f', ax=axes[0], 
            cbar_kws={'label': 'Correlation Coefficient'})
axes[0].set_title('Correlation Heatmap - All Numerical Features', fontsize=14, fontweight='bold', pad=20)

# Fraud correlation bar plot
fraud_corr_sorted = fraud_correlations.drop('is_fraud').sort_values()
colors_corr = ['red' if x < 0 else 'green' for x in fraud_corr_sorted.values]
axes[1].barh(range(len(fraud_corr_sorted)), fraud_corr_sorted.values, color=colors_corr, edgecolor='black', alpha=0.7)
axes[1].set_yticks(range(len(fraud_corr_sorted)))
axes[1].set_yticklabels(fraud_corr_sorted.index)
axes[1].set_xlabel('Correlation Coefficient')
axes[1].set_title('Feature Correlation with Fraud', fontsize=14, fontweight='bold', pad=20)
axes[1].axvline(0, color='black', linewidth=1)
axes[1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('05_correlation_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as: 05_correlation_analysis.png")

## 10. Key Insights and Summary

In [None]:
print("="*80)
print("COMPREHENSIVE SUMMARY & KEY INSIGHTS")
print("="*80)

print("\nDATASET OVERVIEW:")
print(f"  Total Transactions: {len(df):,}")
print(f"  Total Features: {df.shape[1]}")

print("\nFRAUD DETECTION METRICS:")
print(f"  Fraudulent Transactions: {fraud_counts[1]:,} ({fraud_percentage[1]:.2f}%)")
print(f"  Legitimate Transactions: {fraud_counts[0]:,} ({fraud_percentage[0]:.2f}%)")
print(f"  Class Imbalance Ratio: 1:{fraud_counts[0]/fraud_counts[1]:.2f}")

print("\nTRANSACTION AMOUNT INSIGHTS:")
print(f"  Average Amount: ${df['transaction_amount'].mean():,.2f}")
print(f"  Median Amount: ${df['transaction_amount'].median():,.2f}")
print(f"  Total Volume: ${df['transaction_amount'].sum():,.2f}")
print(f"  Average Fraud Amount: ${df[df['is_fraud']==1]['transaction_amount'].mean():,.2f}")
print(f"  Average Legitimate Amount: ${df[df['is_fraud']==0]['transaction_amount'].mean():,.2f}")

print("\nACCOUNT AGE INSIGHTS:")
print(f"  Average Account Age: {df['account_age_days'].mean():.0f} days")
print(f"  Median Account Age: {df['account_age_days'].median():.0f} days")
print(f"  Average Age (Fraud): {df[df['is_fraud']==1]['account_age_days'].mean():.0f} days")
print(f"  Average Age (Legitimate): {df[df['is_fraud']==0]['account_age_days'].mean():.0f} days")

print("\nCHANNEL INSIGHTS:")
for channel, rate in sorted(channel_fraud_rates.items(), key=lambda x: x[1], reverse=True):
    count = channel_data[channel]['count']
    print(f"  {channel}: {int(count):,} transactions, {rate:.2f}% fraud rate")

print("\nKYC VERIFICATION INSIGHTS:")
for kyc, rate in sorted(kyc_fraud_rates.items(), key=lambda x: x[1], reverse=True):
    count = kyc_data[kyc]['count']
    print(f"  {kyc}: {int(count):,} transactions, {rate:.2f}% fraud rate")

print("\nKEY RISK FACTORS:")
highest_risk_channel = max(channel_fraud_rates, key=channel_fraud_rates.get)
print(f"  1. Non-KYC verified accounts have higher fraud rates")
print(f"  2. {highest_risk_channel} channel shows highest fraud rate ({channel_fraud_rates[highest_risk_channel]:.2f}%)")
print(f"  3. Fraudulent transactions have significantly higher average amounts")

print("\nRECOMMENDATIONS:")
print("  1. Implement enhanced verification for non-KYC verified accounts")
print("  2. Apply stricter monitoring for high-value transactions")
print(f"  3. Focus fraud detection resources on {highest_risk_channel} channel")
print(f"  4. Increase surveillance during peak fraud hours")
print("  5. Consider implementing dynamic risk scoring based on multiple factors")
print("  6. Investigate accounts with lower age (potential correlation with fraud)")

print("\n" + "="*80)
print("EDA COMPLETE")
print("="*80)