# Lab 3 – Descriptive Statistical Analysis of the CKD Dataset

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo

In [None]:
ckd = fetch_ucirepo(id=336)

X = ckd.data.features
y = ckd.data.targets

df = pd.concat([X, y], axis=1)
df.head()

In [None]:
# Replace '?' with NaN and handle missing values
df.replace('?', np.nan, inplace=True)

num_cols = ['age', 'bp', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wbcc', 'rbcc']
cat_cols = [col for col in df.columns if col not in num_cols + ['class']]

for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    df[col].fillna(df[col].median(), inplace=True)

for col in cat_cols:
    df[col].fillna(df[col].mode()[0], inplace=True)

print("Missing values after cleaning:")
print(df.isnull().sum())

## Mean, Median, Mode

In [None]:
print("=== MEAN ===")
print(df[num_cols].mean())
print("\n=== MEDIAN ===")
print(df[num_cols].median())
print("\n=== MODE ===")
print(df[num_cols].mode().iloc[0])

## Min, Max, Sum

In [None]:
print("=== MIN ===")
print(df[num_cols].min())
print("\n=== MAX ===")
print(df[num_cols].max())
print("\n=== SUM ===")
print(df[num_cols].sum())

## Standard Deviation and Variance

In [None]:
print("=== STANDARD DEVIATION ===")
print(df[num_cols].std())
print("\n=== VARIANCE ===")
print(df[num_cols].var())

## Quartiles and Percentiles

In [None]:
print("=== QUARTILES (Q1, Q2, Q3) ===")
print(df[num_cols].quantile([0.25, 0.50, 0.75]))

print("\n=== PERCENTILES (10th, 25th, 50th, 75th, 90th) ===")
print(df[num_cols].quantile([0.10, 0.25, 0.50, 0.75, 0.90]))

## Correlation Matrix

In [None]:
print("=== CORRELATION MATRIX ===")
corr = df[num_cols].corr()
print(corr)

plt.figure(figsize=(12, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.title("Correlation Heatmap of Numeric Features")
plt.tight_layout()
plt.show()

## Covariance Matrix

In [None]:
print("=== COVARIANCE MATRIX ===")
cov = df[num_cols].cov()
print(cov)

plt.figure(figsize=(12, 8))
sns.heatmap(cov, annot=True, fmt=".1f", cmap="YlGnBu")
plt.title("Covariance Heatmap of Numeric Features")
plt.tight_layout()
plt.show()

## Complete Descriptive Summary

In [None]:
print("=== COMPLETE DESCRIPTIVE SUMMARY (df.describe()) ===")
df[num_cols].describe()