### Calculate mean, median, mode

In [None]:

import numpy as np
from scipy import stats

data = [10, 15, 20, 20, 25, 30]

mean = np.mean(data)
median = np.median(data)
mode = stats.mode(data, keepdims=True).mode[0]

print("Mean:", mean)
print("Median:", median)
print("Mode:", mode)


### Compute variance and standard deviation

In [None]:

variance = np.var(data)
std_dev = np.std(data)

print("Variance:", variance)
print("Standard Deviation:", std_dev)


### Create dataset and classify levels

In [None]:

dataset = {
    'nominal': ['red', 'blue', 'green'],
    'ordinal': ['low', 'medium', 'high'],
    'interval': [20, 30, 40],
    'ratio': [5, 10, 15]
}

print(dataset)


### Implement random and stratified sampling

In [None]:

import pandas as pd

df = pd.DataFrame({
    'id': range(1, 101),
    'group': ['A']*50 + ['B']*50
})

random_sample = df.sample(n=10)

stratified_sample = df.groupby('group', group_keys=False).apply(lambda x: x.sample(5))

print("Random Sample:\n", random_sample)
print("Stratified Sample:\n", stratified_sample)


### Calculate range of dataset

In [None]:

def calculate_range(data):
    return max(data) - min(data)

print("Range:", calculate_range(data))


### Plot histogram to visualize skewness

In [None]:

import matplotlib.pyplot as plt

plt.hist(data, bins=5)
plt.title('Histogram')
plt.show()


### Calculate skewness and kurtosis

In [None]:

skewness = stats.skew(data)
kurtosis = stats.kurtosis(data)

print("Skewness:", skewness)
print("Kurtosis:", kurtosis)


### Generate positive and negative skewness

In [None]:

import seaborn as sns

pos_skew = np.random.exponential(scale=2, size=1000)
neg_skew = np.random.beta(a=2, b=5, size=1000)

sns.histplot(pos_skew, kde=True)
plt.title('Positive Skew')
plt.show()

sns.histplot(neg_skew, kde=True)
plt.title('Negative Skew')
plt.show()


### Calculate covariance between two datasets

In [None]:

x = np.random.rand(100)
y = np.random.rand(100)

cov_matrix = np.cov(x, y)
covariance = cov_matrix[0, 1]

print("Covariance:", covariance)


### Calculate correlation coefficient

In [None]:

correlation = np.corrcoef(x, y)[0, 1]

print("Correlation Coefficient:", correlation)


### Scatter plot between two variables

In [None]:

plt.scatter(x, y)
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Scatter Plot')
plt.show()


### Compare simple random and systematic sampling

In [None]:

simple_random = df.sample(n=10)
step = 10
systematic = df.iloc[::step, :]

print("Simple Random:\n", simple_random)
print("Systematic:\n", systematic)


### Calculate mean of grouped data

In [None]:

grouped_data = {
    'class': ['0-10', '10-20', '20-30'],
    'frequency': [5, 8, 7]
}

mean_grouped = np.average([5, 15, 25], weights=[5, 8, 7])
print("Mean of Grouped Data:", mean_grouped)


### Simulate data and calculate central tendency + dispersion

In [None]:

sim_data = np.random.normal(loc=50, scale=10, size=100)

print("Mean:", np.mean(sim_data))
print("Median:", np.median(sim_data))
print("Std Dev:", np.std(sim_data))
print("Variance:", np.var(sim_data))


### Summarize descriptive statistics

In [None]:

df = pd.DataFrame({'values': sim_data})
print(df.describe())


### Plot boxplot to identify outliers

In [None]:

sns.boxplot(x=df['values'])
plt.show()


### Calculate interquartile range (IQR)

In [None]:

q1 = np.percentile(sim_data, 25)
q3 = np.percentile(sim_data, 75)
iqr = q3 - q1

print("IQR:", iqr)


### Implement Z-score normalization

In [None]:

z_scores = (sim_data - np.mean(sim_data)) / np.std(sim_data)
print("First 5 Z-scores:", z_scores[:5])


### Compare two datasets using standard deviations

In [None]:

data1 = np.random.normal(0, 1, 100)
data2 = np.random.normal(0, 5, 100)

print("Std Dev Data1:", np.std(data1))
print("Std Dev Data2:", np.std(data2))


### Visualize covariance using heatmap

In [None]:

cov_matrix = np.cov(data1, data2)
sns.heatmap(cov_matrix, annot=True, fmt=".2f")
plt.title('Covariance Heatmap')
plt.show()


### Correlation matrix using seaborn

In [None]:

df_corr = pd.DataFrame({'data1': data1, 'data2': data2})
corr_matrix = df_corr.corr()
sns.heatmap(corr_matrix, annot=True)
plt.title('Correlation Matrix')
plt.show()


### Visualize skewness and kurtosis

In [None]:

sns.histplot(sim_data, kde=True)
plt.title('Histogram with KDE')
plt.show()

print("Skewness:", stats.skew(sim_data))
print("Kurtosis:", stats.kurtosis(sim_data))


### Pearson and Spearman correlation coefficients

In [None]:

pearson_corr = stats.pearsonr(data1, data2)[0]
spearman_corr = stats.spearmanr(data1, data2)[0]

print("Pearson Correlation:", pearson_corr)
print("Spearman Correlation:", spearman_corr)
