# 1. Calculate Mean, Median, Mode

In [None]:
import statistics as stats

data = [4, 5, 6, 6, 7, 8, 9]
mean = stats.mean(data)
median = stats.median(data)
mode = stats.mode(data)

print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)

# 2. Variance & Standard Deviation

In [None]:
import statistics as stats

data = [10, 12, 23, 23, 16, 23, 21, 16]
variance = stats.variance(data)
std_dev = stats.stdev(data)

print("Variance:", variance)
print("Standard Deviation:", std_dev)

# 3. Dataset for Nominal, Ordinal, Interval, Ratio

In [None]:
dataset = {
    "Nominal": ["Red", "Blue", "Green"],
    "Ordinal": ["Low", "Medium", "High"],
    "Interval": [10, 20, 30],  # No true zero (e.g., temperature in Celsius)
    "Ratio": [100, 200, 300]   # True zero (e.g., income, height)
}

# 4. Random & Stratified Sampling

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Random Sampling
data = pd.DataFrame({'value': range(100)})
random_sample = data.sample(n=10)

# Stratified Sampling
data['category'] = ['A']*50 + ['B']*50
stratified_sample = data.groupby('category', group_keys=False).apply(lambda x: x.sample(5))

print("Random Sample:\n", random_sample)
print("\nStratified Sample:\n", stratified_sample)

# 5. Function to Calculate Range

In [None]:
def calculate_range(data):
    return max(data) - min(data)

data = [5, 10, 15, 20, 25]
print("Range:", calculate_range(data))

# 6. Plot Histogram to Visualize Skewness

In [None]:
import matplotlib.pyplot as plt
import numpy as np

data = np.random.exponential(scale=2, size=1000)  # Skewed data
plt.hist(data, bins=30, edgecolor='black')
plt.title('Histogram to Visualize Skewness')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()

# 7. Skewness and Kurtosis

In [None]:
from scipy.stats import skew, kurtosis

print("Skewness:", skew(data))
print("Kurtosis:", kurtosis(data))

# 8. Positive & Negative Skewness

In [None]:
pos_skew = np.random.exponential(scale=2, size=1000)
neg_skew = -np.random.exponential(scale=2, size=1000)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.hist(pos_skew, bins=30)
plt.title('Positive Skew')

plt.subplot(1, 2, 2)
plt.hist(neg_skew, bins=30)
plt.title('Negative Skew')

plt.show()

# 9. Covariance Between Two Datasets

In [None]:
import numpy as np

x = np.array([2, 4, 6, 8])
y = np.array([1, 3, 5, 7])

cov_matrix = np.cov(x, y)
print("Covariance Matrix:\n", cov_matrix)

# 10. Correlation Coefficient

In [None]:
correlation = np.corrcoef(x, y)
print("Correlation Coefficient Matrix:\n", correlation)

#  11. Scatter Plot

In [None]:
plt.scatter(x, y)
plt.title('Scatter Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.show()

# 12. Simple vs Systematic Sampling

In [None]:
import pandas as pd

data = pd.DataFrame({'value': range(100)})

# Simple Random Sampling
random_sample = data.sample(n=10)

# Systematic Sampling
k = 10  # every kth item
systematic_sample = data.iloc[::k]

print("Simple Random Sample:\n", random_sample)
print("Systematic Sample:\n", systematic_sample)

# 13. Grouped Data – Mean, Median, Mode

In [None]:
import pandas as pd
import numpy as np

grouped_data = {
    'Class Interval': ['0-10', '10-20', '20-30'],
    'Frequency': [5, 15, 10]
}
df = pd.DataFrame(grouped_data)

# Midpoints
df['Midpoint'] = df['Class Interval'].apply(lambda x: (int(x.split('-')[0]) + int(x.split('-')[1])) / 2)
df['f*x'] = df['Frequency'] * df['Midpoint']

mean = df['f*x'].sum() / df['Frequency'].sum()
print("Grouped Mean:", mean)

# 14. Simulate Data and Calculate Central Tendency & Dispersion

In [None]:
data = np.random.normal(loc=50, scale=10, size=1000)

print("Mean:", np.mean(data))
print("Median:", np.median(data))
print("Standard Deviation:", np.std(data))
print("Variance:", np.var(data))

#  15. Descriptive Stats with NumPy / Pandas

In [None]:
import pandas as pd

df = pd.DataFrame({'values': data})
print(df.describe())

# 16. Boxplot for Spread & Outliers

In [None]:
plt.boxplot(data)
plt.title('Boxplot')
plt.show()

# 17. Calculate IQR

In [None]:
Q1 = np.percentile(data, 25)
Q3 = np.percentile(data, 75)
IQR = Q3 - Q1
print("Interquartile Range (IQR):", IQR)

# 18. Z-Score Normalization

In [None]:
from scipy.stats import zscore

z_scores = zscore(data)
print("Z-Scores (first 5):", z_scores[:5])

# 19. Compare Two Datasets' Standard Deviations

In [None]:
data1 = np.random.normal(50, 5, 1000)
data2 = np.random.normal(50, 20, 1000)

print("Std Dev of data1:", np.std(data1))
print("Std Dev of data2:", np.std(data2))

# 20. Covariance Heatmap

In [None]:
import seaborn as sns

df = pd.DataFrame({'x': x, 'y': y})
sns.heatmap(df.cov(), annot=True, cmap='coolwarm')
plt.title('Covariance Heatmap')
plt.show()

# 21. Correlation Matrix using Seaborn

In [None]:
sns.heatmap(df.corr(), annot=True, cmap='YlGnBu')
plt.title('Correlation Matrix')
plt.show()

# 22. Variance and Standard Deviation

In [None]:
print("Variance:", np.var(data))
print("Standard Deviation:", np.std(data))

# 23. Visualize Skewness and Kurtosis

In [None]:
import seaborn as sns
sns.histplot(data, kde=True)
plt.title("Skewness & Kurtosis Visualization")
plt.show()

# 24. Pearson and Spearman Correlation

In [None]:
from scipy.stats import pearsonr, spearmanr

print("Pearson Correlation:", pearsonr(x, y))
print("Spearman Correlation:", spearmanr(x, y))