In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def summarize_data(df):
    """Returns basic statistics and info about the dataset."""
    return {
        'shape': df.shape,
        'columns': list(df.columns),
        'missing_values': df.isnull().sum(),
        'summary': df.describe()
    }

def plot_correlation_heatmap(df):
    """Plots a correlation heatmap."""
    plt.figure(figsize=(10, 6))
    sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
    plt.title("Correlation Heatmap")
    return plt

def detect_outliers(df, col):
    """Detects outliers using IQR."""
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    outliers = df[(df[col] < Q1 - 1.5 * IQR) | (df[col] > Q3 + 1.5 * IQR)]
    return outliers

def visualize_distribution(df, col):
    """Plots a distribution plot for a column."""
    sns.histplot(df[col], kde=True, bins=30)
    plt.title(f"Distribution of {col}")
    plt.xlabel(col)
    plt.ylabel("Frequency")
    return plt
