# Measures of Central Tendency

## Importing the Necessary Libraries

In [None]:
import numpy as np
# SciPy is a library built on top of NumPy, offering more scientific functions
from scipy import stats

## Setting up the data

In [None]:
# Let's create a sample dataset of house prices in thousands of dollars.
# Note the outlier (2500) which represents a mansion.
house_prices = np.array([150, 200, 175, 220, 180, 250, 175, 190, 210, 2500])
print(f"Dataset of house prices (in $1000s): \n{house_prices}")

Dataset of house prices (in $1000s): 
[ 150  200  175  220  180  250  175  190  210 2500]


## Mean

In [None]:
# The mean is heavily influenced by the outlier (2500).
mean_price = np.mean(house_prices)
print(f"Mean (Average) Price: ${mean_price:,.2f}k")
print("Notice how the single high value pulls the mean up significantly.")

Mean (Average) Price: $425.00k
Notice how the single high value pulls the mean up significantly.


## Median

In [None]:
# The median gives a better sense of the "typical" house price because it is not affected by the outlier.
median_price = np.median(house_prices)
print(f"Median (Middle) Price: ${median_price:,.2f}k")
print("The median provides a more realistic 'center' for this skewed dataset.")

Median (Middle) Price: $195.00k
The median provides a more realistic 'center' for this skewed dataset.


## Mode

In [None]:
# The mode tells us which price appears most frequently.
# stats.mode() returns an object with the mode(s) and their count.
mode_result = stats.mode(house_prices, keepdims=False)
print(f"Mode (Most Frequent) Price: ${mode_result.mode}k")
print(f"This price appeared {mode_result.count} times in the dataset.")

Mode (Most Frequent) Price: $175k
This price appeared 2 times in the dataset.


# Measures of Dispersion

## Importing the necessary libraries

In [None]:
import numpy as np

## Setting up the data

In [None]:
# Define the test scores for two classes
scores_A = np.array([74, 75, 75, 76, 75])
scores_B = np.array([50, 60, 75, 90, 100])

## Class A Analysis

In [None]:
print("--- Analysis of Class A ---")
mean_A = np.mean(scores_A)
print(f"Mean Score: {mean_A}")

# Calculate the range (Peak-to-Peak)
range_A = np.ptp(scores_A) # Equivalent to np.max(scores_A) - np.min(scores_A)
print(f"Range of Scores: {range_A}")

# Calculate the variance.
variance_A = np.var(scores_A)
print(f"Variance: {variance_A:.2f}")

# Calculate the standard deviation
std_dev_A = np.std(scores_A)
print(f"Standard Deviation: {std_dev_A:.2f}")
print("Result: Very low dispersion. Scores are tightly clustered around the mean.")

--- Analysis of Class A ---
Mean Score: 75.0
Range of Scores: 2
Variance: 0.40
Standard Deviation: 0.63
Result: Very low dispersion. Scores are tightly clustered around the mean.


## Class B Analysis

In [None]:
print("--- Analysis of Class B ---")
mean_B = np.mean(scores_B)
print(f"Mean Score: {mean_B}")

# Calculate the range
range_B = np.ptp(scores_B)
print(f"Range of Scores: {range_B}")

# Calculate the variance
variance_B = np.var(scores_B)
print(f"Variance: {variance_B:.2f}")

# Calculate the standard deviation
std_dev_B = np.std(scores_B)
print(f"Standard Deviation: {std_dev_B:.2f}")
print("Result: Very high dispersion. Scores are widely spread out.")

--- Analysis of Class B ---
Mean Score: 75.0
Range of Scores: 50
Variance: 340.00
Standard Deviation: 18.44
Result: Very high dispersion. Scores are widely spread out.


# Measures of Shape

## Importing the Necessary Libraries

In [None]:
import numpy as np
from scipy import stats

## Positively Skewed Data Analysis

In [None]:
# Most values are on the lower end, with a few high-value outliers.
data_positive_skew = np.array([20, 25, 30, 32, 35, 38, 40, 45, 50, 150])

print("--- 1. Positively Skewed Data ---")
mean_pos = np.mean(data_positive_skew)
median_pos = np.median(sorted(data_positive_skew))
skew_pos = stats.skew(data_positive_skew)
print(f"Mean: {mean_pos:.2f}, Median: {median_pos:.2f}")
print(f"Skewness: {skew_pos:.2f}")
print("Result: Mean > Median and Skewness is positive, indicating a right tail.\n")

--- 1. Positively Skewed Data ---
Mean: 46.50, Median: 36.50
Skewness: 2.39
Result: Mean > Median and Skewness is positive, indicating a right tail.



## Negatively Skewed Data Analysis

In [None]:

# Most values are on the higher end, with a few low-value outliers.
data_negative_skew = np.array([10, 85, 90, 92, 94, 95, 96, 98, 99, 100])

print("--- 2. Negatively Skewed Data ---")
mean_neg = np.mean(data_negative_skew)
median_neg = np.median(data_negative_skew)
skew_neg = stats.skew(data_negative_skew)
print(f"Mean: {mean_neg:.2f}, Median: {median_neg:.2f}")
print(f"Skewness: {skew_neg:.2f}")
print("Result: Mean < Median and Skewness is negative, indicating a left tail.\n")

--- 2. Negatively Skewed Data ---
Mean: 85.90, Median: 94.50
Skewness: -2.53
Result: Mean < Median and Skewness is negative, indicating a left tail.



# Kurtosis Analysis

In [None]:
# Data has a sharper peak and more outliers ("fat tails") than a normal distribution.
data_high_kurtosis = np.array([-100, 0, 5, 10, 12, 15, 18, 20, 25, 30, 150])

print("--- 3. High Kurtosis Data (Leptokurtic) ---")
# The 'fisher=True' argument calculates excess kurtosis (subtracting 3). This is the default.
kurtosis_high = stats.kurtosis(data_high_kurtosis, fisher=True)
print(f"Excess Kurtosis: {kurtosis_high:.2f}")
print("Result: Kurtosis is highly positive, indicating fat tails and a high chance of outliers.")

--- 3. High Kurtosis Data (Leptokurtic) ---
Excess Kurtosis: 2.34
Result: Kurtosis is highly positive, indicating fat tails and a high chance of outliers.
