In [1]:
import numpy as np
import pandas as pd

In [2]:
# Synthetic porosity data from a sandstone core (%)
porosity_data = np.array([
12.5, 14.2, 13.8, 11.9, 15.1, 13.0, 14.5, 12.8, 16.0, 13.5,
14.0, 12.2, 15.5, 13.3, 14.8, 11.5, 16.2, 13.9, 14.7, 12.0,
17.1, 13.1, 14.9, 12.6, 15.8, 13.6, 14.3, 11.8, 16.5, 13.2,
15.0, 12.9, 15.3, 13.7, 14.6, 11.7, 16.1, 13.4, 14.4, 12.1
])
print(f"Porosity Data (n={len(porosity_data)}):\n{porosity_data}\n")

Porosity Data (n=40):
[12.5 14.2 13.8 11.9 15.1 13.  14.5 12.8 16.  13.5 14.  12.2 15.5 13.3
 14.8 11.5 16.2 13.9 14.7 12.  17.1 13.1 14.9 12.6 15.8 13.6 14.3 11.8
 16.5 13.2 15.  12.9 15.3 13.7 14.6 11.7 16.1 13.4 14.4 12.1]



In [3]:
# Calculate measures of central tendency
mean_porosity = np.mean(porosity_data)
median_porosity = np.median(porosity_data)

In [5]:
# For mode, we can use scipy.stats, but for simple arrays, we can count
from collections import Counter
counts = Counter(porosity_data)
mode_porosity = [key for key, value in counts.items() if value ==
max(counts.values())]
print(f"Mean Porosity: {mean_porosity:.2f}%")
print(f"Median Porosity: {median_porosity:.2f}%")
print(f"Mode Porosity: {mode_porosity}\% (if multiple, all are listed)")

Mean Porosity: 13.94%
Median Porosity: 13.85%
Mode Porosity: [np.float64(12.5), np.float64(14.2), np.float64(13.8), np.float64(11.9), np.float64(15.1), np.float64(13.0), np.float64(14.5), np.float64(12.8), np.float64(16.0), np.float64(13.5), np.float64(14.0), np.float64(12.2), np.float64(15.5), np.float64(13.3), np.float64(14.8), np.float64(11.5), np.float64(16.2), np.float64(13.9), np.float64(14.7), np.float64(12.0), np.float64(17.1), np.float64(13.1), np.float64(14.9), np.float64(12.6), np.float64(15.8), np.float64(13.6), np.float64(14.3), np.float64(11.8), np.float64(16.5), np.float64(13.2), np.float64(15.0), np.float64(12.9), np.float64(15.3), np.float64(13.7), np.float64(14.6), np.float64(11.7), np.float64(16.1), np.float64(13.4), np.float64(14.4), np.float64(12.1)]\% (if multiple, all are listed)


In [6]:
# Calculate measures of dispersion
range_porosity = np.max(porosity_data) - np.min(porosity_data)
variance_porosity = np.var(porosity_data)
std_dev_porosity = np.std(porosity_data)
print(f"Range: {range_porosity:.2f}%")
print(f"Variance: {variance_porosity:.2f}")
print(f"Standard Deviation: {std_dev_porosity:.2f}%")

Range: 5.60%
Variance: 2.09
Standard Deviation: 1.45%


In [7]:
# Using pandas describe() for a quick summary
porosity_series = pd.Series(porosity_data)
print("\nPandas describe() output:")
print(porosity_series.describe())


Pandas describe() output:
count    40.00000
mean     13.93750
std       1.46352
min      11.50000
25%      12.87500
50%      13.85000
75%      14.92500
max      17.10000
dtype: float64


 Notice how pandas.Series.describe() gives you a quick overview of many of
these statistics. It's a fantastic first step to understand your data at a glance. For our
porosity data, the mean and median are quite close, suggesting a relatively symmetrical
distribution. The standard deviation tells us that, on average, individual porosity values
deviate by about 1.3% from the mean.
