In [1]:
import numpy as np

In [2]:
# Statistics with NumPy
# NumPy is a powerful library in Python that provides a wide range of tools for performing statistical operations. 
# It allows for efficient computation and manipulation of large arrays and matrices of numerical data, 
# which is critical for performing statistical analysis.

# Below, we'll cover some of the common statistical functions available in NumPy, along with examples.

# 1. Mean
# The mean is the average of all elements in an array. 
# It is calculated by summing up all the values and then dividing by the number of elements.

# Function: np.mean()
data = np.array([1, 2, 3, 4, 5])
mean_value = np.mean(data)
print(mean_value)

3.0


In [None]:
# 2. Median
# The median is the middle value in a sorted array. 
# If the number of elements is even, the median is the average of the two middle numbers.

# Function: np.median()

data = np.array([1, 3, 5, 7, 9])
median_value = np.median(data)
print(median_value)


In [4]:
# 3. Standard Deviation
# The standard deviation is a measure of the amount of variation or dispersion in a set of values.
# A low standard deviation means that the values tend to be close to the mean,
# while a high standard deviation means the values are spread out over a wider range.

# Function: np.std()

data = np.array([1, 2, 3, 4, 5])
std_value = np.std(data).round(2)
print(std_value)

1.41


In [None]:
# 4. Variance
# The variance is the square of the standard deviation and provides a measure of the spread in the data. 
# It is the average of the squared differences from the mean.

# Function: np.var()

data = np.array([1, 2, 3, 4, 5])
variance_value = np.var(data)
print(variance_value)


In [None]:
# 5. Percentiles
# A percentile is a measure used in statistics to indicate the value below which a given percentage of observations
# in a group fall. For example, the 50th percentile is the median.

# Function: np.percentile()

data = np.array([1, 2, 3, 4, 5])
percentile_50 = np.percentile(data, 50)  # 50th percentile (median)
print(percentile_50)


In [5]:
# 6. Min and Max
# The minimum value in an array is the smallest element.

# The maximum value is the largest element.

# Functions: np.min() and np.max()

data = np.array([1, 2, 3, 4, 5])
min_value = np.min(data)
max_value = np.max(data)
print(min_value, max_value)


1 5


In [6]:
# 7. Sum and Product
# The sum of the elements in an array gives the total sum.

# The product gives the result of multiplying all elements together.

# Functions: np.sum() and np.prod()

data = np.array([1, 2, 3, 4, 5])
sum_value = np.sum(data)
product_value = np.prod(data)
print(sum_value, product_value)

15 120


In [None]:
# 8. Correlation Coefficient
# The correlation coefficient measures the strength and direction of the linear relationship between two arrays.

# Function: np.corrcoef()

x = np.array([1, 2, 3, 4, 5])
y = np.array([5, 4, 3, 2, 1])
correlation_matrix = np.corrcoef(x, y)
print(correlation_matrix)


In [None]:
# 9. Covariance
# The covariance is a measure of how much two variables change together. 
# If the covariance is positive, the variables tend to increase together; 
# if it's negative, one tends to increase while the other decreases.

# Function: np.cov()

x = np.array([1, 2, 3, 4, 5])
y = np.array([5, 4, 3, 2, 1])
covariance_matrix = np.cov(x, y)
print(covariance_matrix)


In [7]:
# 10. Histogram
# A histogram is a graphical representation of the distribution of data, showing the frequency of values 
# within specified ranges (bins).

# Function: np.histogram()

data = np.array([1, 1, 2, 3, 3, 3, 4, 4, 5])
hist, bins = np.histogram(data, bins=5)
print(hist)
print(bins)


[2 1 3 2 1]
[1.  1.8 2.6 3.4 4.2 5. ]


In [None]:
# 11. Mode
# The mode is the value that appears most frequently in a dataset. 
# While NumPy doesn't have a built-in function for the mode, you can use scipy.stats.mode() for this.

# Function (via SciPy): scipy.stats.mode()

