In [None]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('/mnt/data/file-7CTqdrLXpf9Vkpca2EfnKj')

# Select numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns

# Data profiling summary
profiling_results = pd.DataFrame(columns=['Column', 'Count', 'Mean', 'Median', 'StdDev', 'Min', 'Max', 'MissingValues'])

for col in numeric_cols:
    profiling_results = profiling_results.append({
        'Column': col,
        'Count': df[col].count(),
        'Mean': df[col].mean(),
        'Median': df[col].median(),
        'StdDev': df[col].std(),
        'Min': df[col].min(),
        'Max': df[col].max(),
        'MissingValues': df[col].isnull().sum()
    }, ignore_index=True)

print("=== Data Profiling Summary ===")
print(profiling_results)

# Outlier detection using IQR method
def detect_outliers_iqr(data, column):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = data[(data[column] < lower) | (data[column] > upper)]
    return outliers

# Report outliers for each numeric column
for col in numeric_cols:
    outliers = detect_outliers_iqr(df, col)
    print(f"\nOutliers in '{col}': {len(outliers)} found")
    if not outliers.empty:
        print(outliers[[col]].head())