1.Implementation of Descriptive Statistics


In [3]:
import pandas as pd
import numpy as np
from scipy import stats

# Data dictionary
data = {
    'Age': [23, 25, 31, 35, 45, 55, 60, 65, 70, 72],
    'Income': [20000, 25000, 30000, 35000, 35000, 45000, 55000, 58000, 60000, 72000]
}

# Create DataFrame
df = pd.DataFrame(data)
print(df)

# Set float display format
pd.options.display.float_format = '{:.2f}'.format

# Function to calculate descriptive statistics
def descriptive_statistics(df):
    stats_summary = {}

    for column in df.columns:
        # Calculate mode
        mode_result = stats.mode(df[column], keepdims=True)
        mode_value = mode_result.mode[0] if mode_result.count[0] > 0 else np.nan

        # Store statistics for each column
        stats_summary[column] = {
            'count': df[column].count(),
            'mean': df[column].mean(),
            'median': df[column].median(),
            'mode': mode_value,
            'variance': df[column].var(),
            'std_dev': df[column].std(),
            'min': df[column].min(),
            'max': df[column].max(),
            'range': df[column].max() - df[column].min(),
            '25%': df[column].quantile(0.25),
            '50%(median)': df[column].quantile(0.50),
            '75%': df[column].quantile(0.75),
            'skewness': df[column].skew(),
        }

    # Return as a DataFrame
    return pd.DataFrame(stats_summary)

# Generate descriptive statistics
descriptive_stats = descriptive_statistics(df)

# Print the statistics
print(descriptive_stats)


   Age  Income
0   23   20000
1   25   25000
2   31   30000
3   35   35000
4   45   35000
5   55   45000
6   60   55000
7   65   58000
8   70   60000
9   72   72000
               Age       Income
count        10.00        10.00
mean         48.10     43500.00
median       50.00     40000.00
mode         23.00     35000.00
variance    351.43 294500000.00
std_dev      18.75     17161.00
min          23.00     20000.00
max          72.00     72000.00
range        49.00     52000.00
25%          32.00     31250.00
50%(median)  50.00     40000.00
75%          63.75     57250.00
skewness     -0.09         0.25
