In [1]:
# Question: Evaluating Data Distribution
# Description: Analyze the distribution of a numeric column using histograms and descriptive statistics.
import pandas as pd
import matplotlib.pyplot as plt
import statistics

def evaluate_distribution(df, column_name):
    # Check if column exists
    if column_name not in df.columns:
        print("Error: Column not found.")
        return
    
    # Drop missing values
    data = df[column_name].dropna()
    
    # Check if data is numeric
    if not pd.api.types.is_numeric_dtype(data):
        print("Error: Column is not numeric.")
        return
    
    # Convert to list
    values = data.tolist()
    
    if not values:
        print("The column is empty after removing missing values.")
        return

    # Descriptive statistics
    try:
        mean_val = statistics.mean(values)
        median_val = statistics.median(values)
        mode_val = statistics.mode(values)
        std_dev = statistics.stdev(values) if len(values) > 1 else 0
        min_val = min(values)
        max_val = max(values)
    except statistics.StatisticsError:
        mode_val = "No unique mode"

    print(f"Descriptive Statistics for '{column_name}':")
    print(f"Mean: {mean_val}")
    print(f"Median: {median_val}")
    print(f"Mode: {mode_val}")
    print(f"Standard Deviation: {std_dev}")
    print(f"Min: {min_val}")
    print(f"Max: {max_val}")

    # Plot histogram
    plt.figure(figsize=(8, 5))
    plt.hist(values, bins=10, edgecolor='black', color='skyblue')
    plt.title(f"Histogram of '{column_name}'")
    plt.xlabel(column_name)
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.show()


