In [2]:
import numpy as np
np.__version__

'1.26.4'

In [23]:
import pandas as pd
pd.__version__

'2.2.2'

In [52]:
def gradient_rgba(val, max_val, rgb=(0, 0, 255), alpha=0.4):
    '''
    Apply a semi-transparent gradient to the value based on the range [0, max_val].

    @params:
    val: The current value to apply the gradient.
    max_val: The maximum value in the range.
    rgb: The RGB color of the gradient.
    alpha: The base-transparency of the gradient.
    '''
    r, g, b = rgb
    intensity = val / max_val
    return f'background-color: rgba({r}, {g}, {b}, {intensity * alpha})'

### Simple Frequency-Distribution

In [7]:
# Em um desafio, um programa deveria ter o mínimo de caracteres. 30 programadores participaram
# do desafio. Foram registrados, aproximadamente, em milhares, as seguintes quantidades:
raw = np.array([1, 4, 5, 2, 1, 2, 5, 3, 2, 3, 3, 1, 2, 2, 1, 3, 5, 4, 2, 1, 2, 3, 2, 4, 4, 2, 1, 3, 4, 2])
rol = np.sort(raw)
print('raw:', raw)
print('rol:', rol)

raw: [1 4 5 2 1 2 5 3 2 3 3 1 2 2 1 3 5 4 2 1 2 3 2 4 4 2 1 3 4 2]
rol: [1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 5 5 5]


In [25]:
classes, f = np.unique(raw, return_counts=True) # f=frequencies [# of occurrences]
f_r = f / f.sum()  # relative frequencies
f_percent = f_r * 100
F = np.add.accumulate(f) # F = cumulative frequencies
F_percent = np.add.accumulate(f_percent)

print('f:', f)
print('f%:', f_percent)
print('F:', F)
print('F%:', F_percent)

f: [ 6 10  6  5  3]
f%: [20.         33.33333333 20.         16.66666667 10.        ]
F: [ 6 16 22 27 30]
F%: [ 20.          53.33333333  73.33333333  90.         100.        ]


In [54]:
# Create a DataFrame
freq_table = pd.DataFrame({
    "Class": classes,
    "f": f,
    "Relative Frequency": f_r,
    "Relative Frequency (%)": f_percent,
    "Cumulative Frequency": F,
    "Cumulative Frequency (%)": F_percent
})

max_val = freq_table["Relative Frequency (%)"].max()

# Display the frequency table:
(freq_table.style
          .format({"Relative Frequency": "{:.1f}",
                   "Relative Frequency (%)": "{:.1f}%"})  # 1 decimal for percentage
          .map(lambda x: gradient_rgba(x, max_val), subset=["Relative Frequency (%)"])  # color by value
        .hide(axis="index")  # Hide the index column
    )

Class,f,Relative Frequency,Relative Frequency (%),Cumulative Frequency,Cumulative Frequency (%)
1,6,0.2,20.0%,6,20.0
2,10,0.3,33.3%,16,53.333333
3,6,0.2,20.0%,22,73.333333
4,5,0.2,16.7%,27,90.0
5,3,0.1,10.0%,30,100.0


### Interval Frequency-Distribution

In [97]:
# Os dados a seguir referem-se ao clock (Hz) de 40 processadores observados em um estudo.
raw = [
    2.1, 2.8, 3.3, 3.3, 4.4, 1.2, 3.1, 3.2, 1.1, 2.7,
    2.5, 1.1, 2.9, 1.5, 1.4, 3.6, 2.6, 3.3, 4.2, 3.3,
    2.4, 2.9, 1.3, 3.5, 2.0, 2.8, 2.8, 3.1, 2.7, 2.8,
    2.8, 3.5, 3.5, 3.4, 2.4, 2.5, 2.7, 2.8, 2.3, 2.5
]
rol = np.sort(raw)
print('raw:', raw)
print('rol:', rol)

raw: [2.1, 2.8, 3.3, 3.3, 4.4, 1.2, 3.1, 3.2, 1.1, 2.7, 2.5, 1.1, 2.9, 1.5, 1.4, 3.6, 2.6, 3.3, 4.2, 3.3, 2.4, 2.9, 1.3, 3.5, 2.0, 2.8, 2.8, 3.1, 2.7, 2.8, 2.8, 3.5, 3.5, 3.4, 2.4, 2.5, 2.7, 2.8, 2.3, 2.5]
rol: [1.1 1.1 1.2 1.3 1.4 1.5 2.  2.1 2.3 2.4 2.4 2.5 2.5 2.5 2.6 2.7 2.7 2.7
 2.8 2.8 2.8 2.8 2.8 2.8 2.9 2.9 3.1 3.1 3.2 3.3 3.3 3.3 3.3 3.4 3.5 3.5
 3.5 3.6 4.2 4.4]


In [98]:
LS = rol[-1] # Superior Limit
LI = rol[0] # Inferior Limit
AT = LS - LI # Total Amplitude
k = np.sqrt(np.size(raw)).astype(np.int32) + 1 # Number of classes
h = AT / k # Class amplitude

print(f'Total Amplitude (variation) in [{LI}, {LS}]:', AT)
print('#of classes:', k)
print('Interval for each class:', h)

Total Amplitude (variation) in [1.1, 4.4]: 3.3000000000000003
#of classes: 7
Interval for each class: 0.4714285714285715


In [135]:
f = np.zeros(k, dtype=np.int32)
for i in range(k):
    l_i = LI + i * h       # Inferior limit
    l_s = l_i + h          # Superior limit
    x_m = (l_i + l_s) / 2  # midpoint
    if i == k - 1:
        # Last class
        f[i] = np.sum((rol >= l_i) & (rol <= l_s))
    else:
        f[i] = np.sum((rol >= l_i) & (rol < l_s))
print(f)
f.sum()

[ 6  1  7 12  8  4  2]


40

In [136]:
f_r = f / f.sum()  # relative frequencies
f_percent = f_r * 100
F = np.add.accumulate(f) # F = cumulative frequencies
F_percent = np.add.accumulate(f_percent)

print('f%:', f_percent)
print('F:', F)
print('F%:', F_percent)

f%: [15.   2.5 17.5 30.  20.  10.   5. ]
F: [ 6  7 14 26 34 38 40]
F%: [ 15.   17.5  35.   65.   85.   95.  100. ]


In [144]:
limits = np.linspace(LI, LS, k + 1)

#x_m = (limits[:-1] + limits[1:]) / 2  # Modo direto
points = np.column_stack((limits[:-1], limits[1:]))
x_m = points.mean(axis=1)

# Formata cada par como string "li,ls ˫ li,ls"
classes = [f"{li:.2f} ˫ {ls:.2f}" for li, ls in points]
classes

['1.10 ˫ 1.57',
 '1.57 ˫ 2.04',
 '2.04 ˫ 2.51',
 '2.51 ˫ 2.99',
 '2.99 ˫ 3.46',
 '3.46 ˫ 3.93',
 '3.93 ˫ 4.40']

In [162]:
# Create a DataFrame
freq_table = pd.DataFrame({
        "Class": classes,
        "f": f,
        "Midpoint": x_m,
        "Relative Frequency": f_r,
        "Relative Frequency (%)": f_percent,
        "Cumulative Frequency": F,
        "Cumulative Frequency (%)": F_percent
    })

freq_table_total = pd.concat([
    freq_table,
    pd.DataFrame({
        "Class": ["Total"],
        "f": [f.sum()],
        "Midpoint": [np.nan],
        "Relative Frequency": [np.sum(f_r)],
        "Relative Frequency (%)": [np.sum(f_percent)],
        "Cumulative Frequency": [np.nan],
        "Cumulative Frequency (%)": [np.nan]
    })
], ignore_index=True)

# Get max for gradient
max_val = freq_table_total.loc[freq_table_total["Class"] != "Total", "Relative Frequency (%)"].max()

# Style and display the frequency table
(
freq_table_total.style
    .format({
        "Midpoint":  lambda v: "-" if pd.isna(v) else f"{v:.2f}",
        "Relative Frequency": "{:.3f}",
        "Relative Frequency (%)": "{:.1f}%",
        "Cumulative Frequency": lambda v: "-" if pd.isna(v) else f"{v:.0f}",
        "Cumulative Frequency (%)": lambda v: "-" if pd.isna(v) else f"{v:.1f}",
    })
    # Color gradient only for non-total rows
    .map(lambda x: (
        "" if pd.isna(x) or x == freq_table_total.loc[freq_table_total["Class"]=="Total", "Relative Frequency (%)"].values[0]
        else gradient_rgba(x, max_val, (50, 20, 200))
    ), subset=["Relative Frequency (%)"])
    # Bold total row
    .apply(lambda row: ["font-weight: bold;" if row["Class"] == "Total" else "" for _ in row], axis=1)   # color by value
    .hide(axis="index")  # Hide the index column
)


Class,f,Midpoint,Relative Frequency,Relative Frequency (%),Cumulative Frequency,Cumulative Frequency (%)
1.10 ˫ 1.57,6,1.34,0.15,15.0%,6,15.0
1.57 ˫ 2.04,1,1.81,0.025,2.5%,7,17.5
2.04 ˫ 2.51,7,2.28,0.175,17.5%,14,35.0
2.51 ˫ 2.99,12,2.75,0.3,30.0%,26,65.0
2.99 ˫ 3.46,8,3.22,0.2,20.0%,34,85.0
3.46 ˫ 3.93,4,3.69,0.1,10.0%,38,95.0
3.93 ˫ 4.40,2,4.16,0.05,5.0%,40,100.0
Total,40,-,1.0,100.0%,-,-
