In [46]:
import pandas as pd
from IPython.display import display

sample = {
    "Normal": [67, 70, 63, 65, 68, 60, 70, 64, 69, 61, 66, 65, 71, 62, 66, 68, 64, 67, 62, 66, 65, 63, 66, 65, 63, 69, 62, 67, 59, 66, 65, 63, 65, 60, 67, 64, 68, 61, 69, 65, 62, 67, 70, 64, 63, 68, 64, 65, 61, 66],
    "Skewed-right": [31, 43, 30, 30, 38, 26, 29, 55, 46, 26, 29, 57, 34, 34, 36, 40, 28, 26, 66, 63, 30, 33, 24, 35, 34, 40, 24, 29, 24, 27, 35, 33, 75, 38, 34, 85, 29, 40, 41, 35, 26, 34, 19, 23, 28, 26, 31, 25, 22, 28],
    "Skewed-left": [102, 55, 70, 95, 73, 79, 60, 73, 89, 85, 72, 92, 76, 93, 76, 97, 10, 70, 85, 25, 83, 58, 10, 92, 82, 87, 104, 75, 80, 66, 93, 90, 84, 73, 98, 79, 35, 71, 90, 71, 63, 58, 82, 72, 93, 44, 65, 77, 81, 77],
    "Uniform": [12.1, 12.1, 12.4, 12.1, 12.1, 12.2, 12.2, 12.2, 11.9, 12.2, 12.3, 12.3, 11.7, 12.3, 12.3, 12.4, 12.4, 12.1, 12.4, 12.4, 12.5, 11.8, 12.5, 12.5, 12.5, 11.6, 11.6, 12.0, 11.6, 11.6, 11.7, 12.3, 11.7, 11.7, 11.7, 11.8, 12.5, 11.8, 11.8, 11.8, 11.9, 11.9, 11.9, 12.2, 11.9, 12.0, 11.9, 12.0, 12.0, 12.0]
}

df_normal = pd.DataFrame(sample)

def compute_moments_1(column):
    xlen = len(column)
    m1_mean = sum(column) / xlen  
    m2_var = sum((x) ** 2 for x in column) / xlen  
    m3_skew = sum((x) ** 3 for x in column) / xlen  
    m4_kurt = sum((x) ** 4 for x in column) / xlen  
    return round(m1_mean, 2), round(m2_var, 2), round(m3_skew, 2), round(m4_kurt, 2)

moments1 = {}
for column in df_normal.columns:
    moments1[column] = compute_moments_1(df_normal[column])

moments1_df = pd.DataFrame(moments1, index=['1st moment', '2nd moment', '3rd moment', '4th moment']).T
display(moments1_df)

Unnamed: 0,1st moment,2nd moment,3rd moment,4th moment
Normal,65.12,4248.92,277770.92,18194173.64
Skewed-right,35.48,1437.72,68292.44,3797594.04
Skewed-left,74.2,5925.4,489458.8,41396161.48
Uniform,12.06,145.43,1755.16,21194.59


Upon inspection, this data illustrates how different distributions differ in their mean, variance, skewness, and kurtosis. The normal distribution is relatively balanced, while the skewed distributions show pronounced tendencies toward lower or higher values, indicating their unique characteristics and behaviors in terms of data dispersion and concentration.

In [91]:
def compute_moments_mean(column):
    N = len(column)
    mean = sum(column) / N
    m1_aboutmean = sum((x - mean) for x in column) / N  
    m2_aboutmean = sum((x - mean) ** 2 for x in column) / N   
    m3_aboutmean = sum((x - mean) ** 3 for x in column) / N  
    m4_aboutmean = sum((x - mean) ** 4 for x in column) / N  
    return round(mean, 2), round(m1_aboutmean, 2), round(m2_aboutmean, 2), round(m3_aboutmean, 2), round(m4_aboutmean, 2)

moments2 = {}
for column in df_normal.columns:
    moments2[column] = compute_moments_mean(df_normal[column])

moments2_df = pd.DataFrame(moments2, index=['Mean', '1st moment about the mean', '2nd moment about the mean', '3rd moment about the mean', '4th moment about the mean']).T
display(moments2_df)

Unnamed: 0,Mean,1st moment about the mean,2nd moment about the mean,3rd moment about the mean,4th moment about the mean
Normal,65.12,-0.0,8.31,-0.47,160.95
Skewed-right,35.48,0.0,178.89,4588.13,210642.88
Skewed-left,74.2,-0.0,419.76,-12498.26,927289.75
Uniform,12.06,-0.0,0.08,0.0,0.01


The normal distribution is more tightly clustered around 75, indicating that most values are close to this central point. .

In addition, the skewed-left distribution has the highest variance, revealing a broader range of values around 75, whereas the normal distribution's lower variance indicates a tighter concentration around that point.

The skewed-left distribution's significant negative skewness reflects a greater number of values below 75, emphasizing the imbalance. Finally, its high kurtosis suggests that there are notable extreme values present, indicating that, while many values cluster together, some are much lower.

In [89]:
all_data = df_normal.values.flatten()

def compute_moments_about_75(data):
    N = len(data)
    m1_about_75 = sum((x - 75) for x in data) / N  
    m2_about_75 = sum((x - 75) ** 2 for x in data) / N   
    m3_about_75 = sum((x - 75) ** 3 for x in data) / N  
    m4_about_75 = sum((x - 75) ** 4 for x in data) / N  
    return m1_about_75, m2_about_75, m3_about_75, m4_about_75

moments3 = compute_moments_about_value(all_data,75)

moments3_df = pd.DataFrame(moments3, index=['1st moment about 75', '2nd moment about 75', '3rd moment about 75', '4th moment about 75'], columns=['Value']).T

display(moments3_df)

Unnamed: 0,1st moment about 75,2nd moment about 75,3rd moment about 75,4th moment about 75
Value,-28.286,1557.2665,-85614.38306,5070851.0


In [90]:
distributions = ['Normal', 'Skewed-right', 'Skewed-left', 'Uniform']

verification_results_list = []

for dist in distributions:
    data = df_normal[dist].values

    m1_about_75, m2_about_75, m3_about_75, m4_about_75 = compute_moments_about_75(data)

    m2_check = m2_about_75 - m1_about_75**2
    m3_check = m3_about_75 - 3 * m1_about_75 * m2_about_75 + 2 * (m1_about_75**3)
    m4_check = m4_about_75 - 4 * m1_about_75 * m3_about_75 + 6 * (m1_about_75**2) * m2_about_75 - 3 * (m1_about_75**4)

    verification_results_list.append({
        'Distribution': dist,
        '2nd Moment Check': round(m2_check, 2),
        '3rd Moment Check': round(m3_check, 2),
        '4th Moment Check': round(m4_check, 2)
    })

verification_results = pd.DataFrame(verification_results_list)

display(verification_results)

Unnamed: 0,Distribution,2nd Moment Check,3rd Moment Check,4th Moment Check
0,Normal,8.31,-0.47,160.95
1,Skewed-right,178.89,4588.13,210642.88
2,Skewed-left,419.76,-12498.26,927289.75
3,Uniform,0.08,0.0,0.01


This is the values of 2nd to 4th moment upon verification. On the other hand, the answer in 2 looks like this 

In [88]:
display(moments2_df)

Unnamed: 0,Mean,1st moment about the mean,2nd moment about the mean,3rd moment about the mean,4th moment about the mean
Normal,65.12,-0.0,8.31,-0.47,160.95
Skewed-right,35.48,0.0,178.89,4588.13,210642.88
Skewed-left,74.2,-0.0,419.76,-12498.26,927289.75
Uniform,12.06,-0.0,0.08,0.0,0.01


Upon investigation, if both results are compared, it can be analyzed that the moments in the checking is identical (except 1st moment, which is in this case is equal to 0) to the answer in 2, hence,the relationship between the two is consistent, therefore the data is verified.