# Descriptive Statistics — Practical Implementation

This notebook demonstrates key descriptive statistics concepts with real code examples. Adjust and explore with your own data for a hands-on learning experience!

## Imports & Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Example Dataset

In [2]:
data = {
    'Marks': [55, 63, 70, 65, 64, 68, 72, 66, 61, 59, 80, 90]
}
df = pd.DataFrame(data)
df

## Measures of Central Tendency

In [3]:
mean = df['Marks'].mean()
median = df['Marks'].median()
mode = df['Marks'].mode()[0]
print(f"Mean: {mean}")
print(f"Median: {median}")
print(f"Mode: {mode}")

## Measures of Dispersion

In [4]:
variance = df['Marks'].var()
std = df['Marks'].std()
iqr = df['Marks'].quantile(0.75) - df['Marks'].quantile(0.25)
range_ = df['Marks'].max() - df['Marks'].min()
print(f"Variance: {variance}")
print(f"Standard Deviation: {std}")
print(f"Interquartile Range (IQR): {iqr}")
print(f"Range: {range_}")

## Distribution Analysis

In [5]:
skewness = df['Marks'].skew()
kurtosis = df['Marks'].kurtosis()
print(f"Skewness: {skewness}")
print(f"Kurtosis: {kurtosis}")

## Percentiles and Quartiles

In [6]:
percentiles = [0.25, 0.5, 0.75, 0.9]
for p in percentiles:
    print(f"{int(p*100)}th Percentile: {df['Marks'].quantile(p)}")

## Data Visualization

In [7]:
plt.figure(figsize=(14,6))
plt.subplot(1, 2, 1)
sns.histplot(df['Marks'], bins=7, kde=True)
plt.title('Histogram of Marks')
plt.subplot(1, 2, 2)
sns.boxplot(df['Marks'])
plt.title('Boxplot of Marks')
plt.show()

## Covariance and Correlation (With a Second Feature)

In [8]:
df['Hours_Studied'] = [6, 7, 8, 7, 6, 8, 9, 8, 6, 5, 10, 12]
cov = df[['Marks', 'Hours_Studied']].cov().iloc[0,1]
corr = df[['Marks', 'Hours_Studied']].corr().iloc[0,1]
print(f"Covariance (Marks vs Hours Studied): {cov}")
print(f"Correlation (Marks vs Hours Studied): {corr}")

## Practical Data Science Example: Outlier Handling

In [9]:
Q1 = df['Marks'].quantile(0.25)
Q3 = df['Marks'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
outliers = df[(df['Marks'] < lower_bound) | (df['Marks'] > upper_bound)]
print("Outliers:")
print(outliers)

### End of Notebook
You can expand and experiment with additional examples, datasets, and features for hands-on mastery!