# Item # 5

In [2]:
import numpy as np
import itertools
import pandas as pd

#given data
x = np.array([6, 9, 12, 15, 18])
p_x = np.array([0.1, 0.2, 0.4, 0.2, 0.1])

#  mean (μ)
mu = np.sum(x * p_x)

#  variance (σ²)
var = np.sum(((x - mu) ** 2) * p_x)

# generating sample sizes
samples = list(itertools.product(x, repeat=2))

# means and probabilities for each sample
sample_means = []
probabilities = []

for sample in samples:
    mean = np.mean(sample)
    prob = p_x[x == sample[0]] * p_x[x == sample[1]]
    sample_means.append(mean)
    probabilities.append(prob)

results = pd.DataFrame({
    'Sample Mean': sample_means,
    'Probability': probabilities
})

# probabilities for the same sample means
aggregated_results = results.groupby('Sample Mean').sum().reset_index()

# Display the results
print(f"Mean (μ): {mu}")
print(f"Variance (σ²): {var}\n")
print(aggregated_results)

Mean (μ): 12.000000000000002
Variance (σ²): 10.8

   Sample Mean             Probability
0          6.0  [0.010000000000000002]
1          7.5   [0.04000000000000001]
2          9.0   [0.12000000000000002]
3         10.5   [0.20000000000000007]
4         12.0   [0.26000000000000006]
5         13.5   [0.20000000000000007]
6         15.0   [0.12000000000000002]
7         16.5   [0.04000000000000001]
8         18.0  [0.010000000000000002]


# Item#4

In [3]:
# given data
masses = np.array([20.48, 35.97, 62.34])
std_devs = np.array([0.21, 0.46, 0.54])

# mean
mean_sum = np.sum(masses)

# standard dev
std_dev_sum = np.sqrt(np.sum(std_devs**2))

print(f"Mean of the sum of the masses: {mean_sum:.2f} g")
print(f"Standard deviation of the sum of the masses: {std_dev_sum:.2f} g")


Mean of the sum of the masses: 118.79 g
Standard deviation of the sum of the masses: 0.74 g


# Item#1

In [4]:
data = {
    'Grade Range': ['90-100', '80-89', '70-79', '60-69', '50-59', '40-49', '30-39'],
    'Number of Students': [9, 32, 43, 21, 11, 3, 1]
}

# conversion to dataframe
df = pd.DataFrame(data)

# cumulative frequency
df['Cumulative Frequency'] = df['Number of Students'].cumsum()

students_total = df['Number of Students'].sum()

# defining the quartile positions
Q1_position = students_total * 0.25
Q2_position = students_total * 0.50
Q3_position = students_total * 0.75

# find the quartiles based on cumulative frequency
def find_quartile_position(position):
    for index, row in df.iterrows():
        if row['Cumulative Frequency'] >= position:
            return row['Grade Range']

Q1 = find_quartile_position(Q1_position)
Q2 = find_quartile_position(Q2_position)
Q3 = find_quartile_position(Q3_position)

print(f"Total Number of Students: {total_students}")
print(f"Q1 (25th Percentile) falls in the grade range: {Q1}")
print(f"Q2 (50th Percentile / Median) falls in the grade range: {Q2}")
print(f"Q3 (75th Percentile) falls in the grade range: {Q3}")

Total Number of Students: 120
Q1 (25th Percentile) falls in the grade range: 80-89
Q2 (50th Percentile / Median) falls in the grade range: 70-79
Q3 (75th Percentile) falls in the grade range: 60-69


# Item#3

In [5]:
data = np.array([6, 2, 8, 7, 5])

# mean and standard deviation
mean = np.mean(data)
std_dev = np.std(data)

#  z-scores
z_scores = (data - mean) / std_dev

#  mean and standard deviation of z-scores
mean_z = np.mean(z_scores)
std_dev_z = np.std(z_scores)

print(f"Original Mean: {mean:.2f}")
print(f"Original Standard Deviation: {std_dev:.2f}")
print(f"Z-scores: {z_scores}")
print(f"Mean of Z-scores: {mean_z:.2f}")
print(f"Standard Deviation of Z-scores: {std_dev_z:.2f}")


Original Mean: 5.60
Original Standard Deviation: 2.06
Z-scores: [ 0.19425717 -1.74831455  1.16554303  0.6799001  -0.29138576]
Mean of Z-scores: 0.00
Standard Deviation of Z-scores: 1.00


# Item#2

In [6]:
statistics_mean = 78
statistics_std_dev = 8.0
algebra_mean = 73
algebra_std_dev = 7.6

# absolute dispersion 
absolute_dispersion_stats = statistics_std_dev
absolute_dispersion_algebra = algebra_std_dev

#  relative dispersion 
cv_statistics = (absolute_dispersion_stats / statistics_mean) * 100
cv_algebra = (absolute_dispersion_algebra / algebra_mean) * 100

# determining which subject has greater absolute and relative dispersion
greater_absolute_dispersion = "Statistics" if absolute_dispersion_stats > absolute_dispersion_algebra else "Algebra"
greater_relative_dispersion = "Algebra" if cv_algebra > cv_statistics else "Statistics"

print(f"Absolute Dispersion:")
print(f"Statistics: {absolute_dispersion_stats}")
print(f"Algebra: {absolute_dispersion_algebra}\n")
print(f"Relative Dispersion (Coefficient of Variation):")
print(f"Statistics: {cv_statistics:.2f}%")
print(f"Algebra: {cv_algebra:.2f}%\n")
print(f"Greater Absolute Dispersion: {greater_absolute_dispersion}")
print(f"Greater Relative Dispersion: {greater_relative_dispersion}")

Absolute Dispersion:
Statistics: 8.0
Algebra: 7.6

Relative Dispersion (Coefficient of Variation):
Statistics: 10.26%
Algebra: 10.41%

Greater Absolute Dispersion: Statistics
Greater Relative Dispersion: Algebra
