In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from astropy.stats import biweight_midvariance
from scipy.stats import gaussian_kde
from ipywidgets import interact
import ipywidgets as widgets
from scipy.optimize import minimize
from scipy.stats import gmean, hmean, trim_mean, mstats, zscore, norm
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.cluster import KMeans
from sklearn.experimental import enable_iterative_imputer
from sklearn.decomposition import PCA
from scipy import stats
from sklearn.impute import SimpleImputer, IterativeImputer
from scipy.spatial import distance

data_frame = pd.read_csv("C:\\Users\\david\\OneDrive\\Documents\\GitHub\\Projects\\Billionaires Statistics Dataset.csv")

# Store columns in Standard Variables
numeric_column = 'finalWorth'
weighted_column = 'age'
window_size = 3


# Peform any standard Manipulations on the Column
data_frame[numeric_column] = data_frame[numeric_column].round(2) * 1000000

# Normalize the weighted_column to create weights
total_weighted_column = data_frame[weighted_column].sum()
data_frame['weights'] = data_frame[weighted_column] / total_weighted_column

# Check for NaN values in DataFrame
nan_in_numeric = data_frame[numeric_column].isna().any()
nan_in_weights = data_frame['weights'].isna().any()
zero_in_numeric = (data_frame[numeric_column] == 0).any()
print(f"NaN in numeric_column: {nan_in_numeric}, NaN in weights: {nan_in_weights}, Zero in numeric_column: {zero_in_numeric}, if either are true, this will present issue with Weighted Means")


data_frame.head()


# finalWorth: The final net worth of the billionaire in U.S. dollars. *1000000
# age: The age of the billionaire. (used for Weighted Mean)


# Discriptive Statistics - Measuring Central Tendency (One Value Output)
- Trimmed Mean
- Winsorized Mean
- Winsorized Trimmed Mean
- Truncated Mean
- Tukey's Biweight Mean
- Adaptive Mean
- Skewness-Adjusted Mean
- Winsorized Harmonic Mean
- Outlier-Adjusted Mean
- Asymmetric Trimmed Mean
- Arithmetic Mean
- Median
- Mode
- Geometric Mean
- Harmonic Mean
- Geometric-Harmonic Mean
- Distribution-Adjusted Mean
- Multiple Imputation Mean
- Percentile Mean
- Pseudomedian
- Quadratic Mean (Root Mean Square)
- Lehmer Mean
- Hölder's Mean
- Identric Mean
- Hyperbolic Mean
- Heronian Mean
- Logarithmic Mean
- Isotonic Mean
- Logarithmic-Exponential Mean
- Entropy-Weighted Mean
- Regularized Mean
- Biased Mean
- Damped Mean
- Augmented Mean
- Median-Adjusted Mean
- Non-Linear Mean
- Bootstrapped Mean
- Imputed Mean
- Trimean (Tukey's Trimean)
- Midrange
- Mid-Mean
- Median of Means
- Mode-Based Mean
- Interquartile Mean
- Jackknife Mean
- Stable Mean
- Decaying Mean
- Recursive Mean
- Kernel Density Mean
- Cesàro Mean
- Three-Point Central Tendency


In [44]:
# Trimmed Mean
percentage_to_trim = 0.2  # Set the percentage to trim

def calculate_trimmed_mean(series, percentage):
    return stats.trim_mean(series, percentage)

trimmed_mean = calculate_trimmed_mean(data_frame[numeric_column], percentage_to_trim)
print(f"The Trimmed Mean of the numeric_column with {percentage_to_trim*100}% trimming is: ${trimmed_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Winsorized mean
# Limits are the percentage of values to be trimmed on both sides of the distribution
winsorized_mean = mstats.winsorize(data_frame[numeric_column], limits=[0.1, 0.1]).mean()
print(f"The Winsorized Mean of the numeric_column is, with Limit 10%: ${winsorized_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Winsorized Trimmed Mean
percentage_to_winsorize = 0.2  # Set the percentage to trim

def calculate_winsorized_mean(series, percentage):
    winsorized_data = stats.mstats.winsorize(series, limits=percentage)
    return winsorized_data.mean()

winsorized_trimmed_mean = calculate_winsorized_mean(data_frame[numeric_column], percentage_to_winsorize)
print(f"The Winsorized Trimmed Mean of the numeric_column with {percentage_to_winsorize*100}% Winsorization is: ${winsorized_trimmed_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Truncated Mean
def calculate_truncated_mean(series, lower_percentage, upper_percentage):
    sorted_series = sorted(series)
    lower_index = int(len(sorted_series) * lower_percentage)
    upper_index = int(len(sorted_series) * (1 - upper_percentage))
    truncated_data = sorted_series[lower_index:upper_index]
    return np.mean(truncated_data)

truncated_mean = calculate_truncated_mean(data_frame[numeric_column], 0.1, 0.1)
print(f"The Truncated Mean of the numeric_column is: ${truncated_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Tukey's Biweight Mean
def tukeys_biweight_mean(series, tuning_constant=5.0):
    M = np.mean(series)
    u = (series - M) / (tuning_constant * np.std(series))
    u_squared = u ** 2
    weights = (1 - u_squared) ** 2 * (np.abs(u) <= 1)
    return np.sum(weights * series) / np.sum(weights)

biweight_mean = tukeys_biweight_mean(data_frame[numeric_column])
print(f"The Tukey's Biweight Mean of the numeric_column is: ${biweight_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Adaptive Mean
adaptive_mean = (trimmed_mean + truncated_mean) / 2
print(f"The Adaptive Mean of the numeric_column is: ${adaptive_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Skewness-Adjusted Mean
skewness = stats.skew(data_frame[numeric_column])
skewness_adjusted_mean = np.mean(data_frame[numeric_column]) + skewness
print(f"The Skewness-Adjusted Mean of the numeric_column is: ${skewness_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Winsorized Harmonic Mean
winsorized_data = mstats.winsorize(data_frame[numeric_column], limits=[0.1, 0.1])
winsorized_harmonic_mean = stats.hmean(winsorized_data.data)
print(f"The Winsorized Harmonic Mean of the numeric_column is: ${winsorized_harmonic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Outlier-Adjusted Mean
z_scores = np.abs(stats.zscore(data_frame[numeric_column]))
outlier_adjusted_mean = np.mean(data_frame[numeric_column][z_scores < 3])
print(f"The Outlier-Adjusted Mean of the numeric_column is: ${outlier_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Asymmetric Trimmed Mean
asymmetric_trimmed_mean = calculate_truncated_mean(data_frame[numeric_column], 0.1, 0.2)
print(f"The Asymmetric Trimmed Mean of the numeric_column is: ${asymmetric_trimmed_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Arithmetic Mean
arithmetic_mean = data_frame[numeric_column].mean()
print(f"The arithmetic mean of the numeric_column is: ${arithmetic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Median
median = data_frame[numeric_column].median()
print(f"The median of the numeric_column is: ${median:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Mode
mode = data_frame[numeric_column].mode()
print(f"The mode of the numeric_column is: ${mode[0]:,.2f}")

# If you want to print all modes
for mode in mode:
    print(f"A mode of the numeric_column is: ${mode:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Geometric Mean
geometric_mean = gmean(data_frame[numeric_column])
print(f"The Geometric Mean of the numeric_column is: ${geometric_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Harmonic Mean
harmonic_mean = hmean(data_frame[numeric_column])
print(f"The Harmonic Mean of the numeric_column is: ${harmonic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Geometric-Harmonic Mean
geometric_harmonic_mean = 1 / hmean(1 / gmean(data_frame[numeric_column]))
print(f"The Geometric-Harmonic Mean of the numeric_column is: ${geometric_harmonic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Distribution-Adjusted Mean
data_transformed = np.log(data_frame[numeric_column])
mean_transformed = np.mean(data_transformed)
distribution_adjusted_mean = np.exp(mean_transformed)
print(f"The Distribution-Adjusted Mean is: ${distribution_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Multiple Imputation Mean
data_with_nan = data_frame[numeric_column].values.reshape(-1, 1)
imputer = IterativeImputer(sample_posterior=True, random_state=0)
data_imputed = imputer.fit_transform(data_with_nan)
multiple_imputation_mean = np.mean(data_imputed)
print(f"The Multiple Imputation Mean is: ${multiple_imputation_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Percentile Mean
# Calculate the mean of the middle 50% of the data (between the 25th and 75th percentiles)
percentile_25 = np.percentile(data_frame[numeric_column], 25)
percentile_75 = np.percentile(data_frame[numeric_column], 75)
percentile_mean = np.mean(data_frame[numeric_column][(data_frame[numeric_column] >= percentile_25) & (data_frame[numeric_column] <= percentile_75)])

print(f"The Percentile Mean is: ${percentile_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Pseudomedian
# Calculate the pseudomedian
pseudomedian = np.percentile(data_frame[numeric_column], 50, interpolation='midpoint')
print(f"The Pseudomedian is: ${pseudomedian:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Quadratic Mean (Root Mean Square)
quadratic_mean = np.sqrt((data_frame[numeric_column] ** 2).mean())
print(f"The Quadratic Mean is: ${quadratic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Lehmer Mean
# You need to define a parameter 'p' for Lehmer Mean
p = 2  # Example value
lehmer_mean = ((data_frame[numeric_column] ** p).mean()) / (data_frame[numeric_column].mean())
print(f"The Lehmer Mean (p={p}) is: {lehmer_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Hölder's Mean
# You need to define a parameter 'p' for Hölder's Mean
p = 2  # Example value
holders_mean = ((data_frame[numeric_column] ** p).mean())**(1/p)
print(f"Hölder's Mean (p={p}) is: {holders_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Identric Mean
identric_mean = (data_frame[numeric_column] * (1/np.log(data_frame[numeric_column]/data_frame[numeric_column].mean()))).mean()
print(f"The Identric Mean is: {identric_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Hyperbolic Mean
hyperbolic_mean = len(data_frame) / (1/data_frame[numeric_column]).sum()
print(f"The Hyperbolic Mean is: {hyperbolic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Heronian Mean
heronian_mean = 2 * (data_frame[numeric_column] * data_frame[numeric_column].mean()) / (data_frame[numeric_column] + data_frame[numeric_column].mean())
print(f"The Heronian Mean is: {heronian_mean.mean():,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Logarithmic Mean
logarithmic_mean = ((data_frame[numeric_column] - data_frame[numeric_column].mean()) / (np.log(data_frame[numeric_column]) - np.log(data_frame[numeric_column].mean()))).mean()
print(f"The Logarithmic Mean is: {logarithmic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Isotonic Mean
# You need to define a parameter 'p' for Isotonic Mean
p = 2  # Example value
isotonic_mean = ((data_frame[numeric_column] ** (1/p)).mean())**p
print(f"The Isotonic Mean (p={p}) is: {isotonic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Logarithmic-Exponential Mean
log_exp_mean = np.exp((np.log(data_frame[numeric_column])).mean())
print(f"The Logarithmic-Exponential Mean is: {log_exp_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Entropy-Weighted Mean
probabilities = data_frame[numeric_column] / data_frame[numeric_column].sum()
entropy_weighted_mean = -np.sum(probabilities * np.log(probabilities)) * data_frame[numeric_column].mean()
print(f"The Entropy-Weighted Mean is: {entropy_weighted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Regularized Mean
# You need to define a regularization parameter 'lambda_'
lambda_ = 0.1  # Example value
regularized_mean = (data_frame[numeric_column].mean()) / (1 + lambda_)
print(f"The Regularized Mean (lambda={lambda_}) is: {regularized_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Biased Mean
# Note: The sample mean is an unbiased estimator of the population mean.
# If you want to calculate a biased estimate, you might adjust the degrees of freedom.
biased_mean = data_frame[numeric_column].mean()
print(f"The Biased Mean is: {biased_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Damped Mean
# You need to define a damping factor 'alpha'
alpha = 0.9  # Example value
damped_mean = alpha * data_frame[numeric_column].mean() + (1 - alpha) * data_frame[numeric_column].iloc[0]
print(f"The Damped Mean (alpha={alpha}) is: {damped_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Augmented Mean (trimmed mean)
augmented_mean = stats.trim_mean(data_frame[numeric_column], proportiontocut=0.05)
print(f"The Augmented Mean of the numeric_column is: {augmented_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Median-Adjusted Mean (Mean adjusted by subtracting the median)
median_adjusted_mean = data_frame[numeric_column].mean() - data_frame[numeric_column].median()
print(f"The Median-Adjusted Mean of the numeric_column is: {median_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Non-Linear Mean
non_linear_mean = np.exp(data_frame[numeric_column].apply(np.log).mean())
print(f"The Non-Linear Mean (logarithmic) of the numeric_column is: {non_linear_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Bootstrapped Mean
bootstrap_means = [data_frame[numeric_column].sample(frac=1, replace=True).mean() for _ in range(1000)]
bootstrapped_mean = np.mean(bootstrap_means)
print(f"The Bootstrapped Mean of the numeric_column is: {bootstrapped_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Imputed Mean
imputed_data = data_frame[numeric_column].fillna(data_frame[numeric_column].mean())
imputed_mean = imputed_data.mean()
print(f"The Imputed Mean of the numeric_column is: {imputed_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Trimean (Tukey's Trimean)
Q1 = np.percentile(data_frame[numeric_column], 25)
Q3 = np.percentile(data_frame[numeric_column], 75)
median = np.median(data_frame[numeric_column])
trimean = (Q1 + 2 * median + Q3) / 4
print(f"The Trimean of the numeric_column is: {trimean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Midrange
max_value = data_frame[numeric_column].max()
min_value = data_frame[numeric_column].min()
midrange = (max_value + min_value) / 2
print(f"The Midrange of the numeric_column is: {midrange:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Mid-Mean (similar to Midrange)
mid_mean = midrange
print(f"The Mid-Mean of the numeric_column is: {mid_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Median of Means (assuming equal-sized groups)
group_size = 5  # Arbitrary group size
groups = [data_frame[numeric_column][i:i + group_size] for i in range(0, len(data_frame[numeric_column]), group_size)]
median_of_means = np.median([g.mean() for g in groups if len(g) > 0])
print(f"The Median of Means of the numeric_column is: {median_of_means:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Mode-Based Mean
mode_result = stats.mode(data_frame[numeric_column])

if np.isscalar(mode_result[0]):
    mode_value = mode_result[0]
else:
    mode_value = mode_result[0][0]

mode_based_mean = data_frame[data_frame[numeric_column] == mode_value][numeric_column].mean()
print(f"The Mode-Based Mean of the numeric_column is: {mode_based_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Interquartile Mean
Q1 = np.percentile(data_frame[numeric_column], 25)
Q3 = np.percentile(data_frame[numeric_column], 75)
interquartile_data = data_frame[(data_frame[numeric_column] >= Q1) & (data_frame[numeric_column] <= Q3)]
interquartile_mean = interquartile_data[numeric_column].mean()
print(f"The Interquartile Mean of the numeric_column is: {interquartile_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Jackknife Mean
# Jackknife Mean
jackknife_means = [data_frame[numeric_column][data_frame.index != i].mean() for i in data_frame.index]
jackknife_mean = np.mean(jackknife_means)
print(f"The Jackknife Mean of the numeric_column is: {jackknife_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Stable Mean
winsorized_mean = mstats.winsorize(data_frame[numeric_column], limits=[0.05, 0.05]).mean()
print(f"The Stable Mean (Winsorized) of the numeric_column is: {winsorized_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Decaying Mean
decaying_mean = data_frame[numeric_column].ewm(span=50).mean().iloc[-1]  # span is a parameter that can be adjusted
print(f"The Decaying Mean of the numeric_column is: {decaying_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Recursive Mean
recursive_mean = data_frame[numeric_column].expanding().mean().iloc[-1]
print(f"The Recursive Mean of the numeric_column is: {recursive_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Three-Point Central Tendency
mean_value = data_frame[numeric_column].mean()
median_value = data_frame[numeric_column].median()
mode_value = data_frame[numeric_column].mode()[0]  # Assuming unimodal for simplicity
three_point_central_tendency = (mean_value + median_value + mode_value) / 3
print(f"The Three-Point Central Tendency of the numeric_column is: {three_point_central_tendency:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Kernel Density Mean
kernel = gaussian_kde(data_frame[numeric_column])
weights = kernel(data_frame[numeric_column])
kernel_density_mean = np.average(data_frame[numeric_column], weights=weights)
print(f"The Kernel Density Mean of the numeric_column is: {kernel_density_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Cesàro Mean
cesaro_sum = np.cumsum(data_frame[numeric_column])
cesaro_mean = cesaro_sum / np.arange(1, len(data_frame[numeric_column]) + 1)
final_cesaro_mean = cesaro_mean.iloc[-1]
print(f"The Cesàro Mean of the numeric_column is: {final_cesaro_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------


# -----------------------------------------Data Visualization---------------------------------------------------

def plot_boxplot():
    # Create the boxplot
    fig = go.Figure()

    fig.add_trace(go.Box(
        x=data_frame[numeric_column],
        name=numeric_column,
        orientation='h',
        hoverinfo='none',
        marker=dict(color='rgba(200,200,200,0.6)'),
        line=dict(color='rgba(150,150,150,0.9)')
    ))

    # Initialize variables to keep track of label positions
    positions = []

    # Add lines and annotations for various statistical measures
    shapes = []
    annotations = []
    modern_colors = ['rgba(237,230,0,0.8)', 'rgba(0,222,4,0.8)', 'rgba(255,133,27,0.8)', 'rgba(0,128,128,0.8)', 'rgba(128,0,128,0.8)']
    measures = [arithmetic_mean, median, mode, trimmed_mean, winsorized_mean, winsorized_trimmed_mean, truncated_mean, biweight_mean, adaptive_mean, skewness_adjusted_mean, winsorized_harmonic_mean, outlier_adjusted_mean, asymmetric_trimmed_mean]
    labels = ['Mean', 'Median', 'Mode', 'Trimmed Mean', 'Winsorized Mean', 'Winsorized Trimmed Mean', 'Truncated Mean', 'Tukey\'s Biweight Mean', 'Adaptive Mean', 'Skewness-Adjusted Mean', 'Winsorized Harmonic Mean', 'Outlier-Adjusted Mean', 'Asymmetric Trimmed Mean']

    for value, color, label in zip(measures, modern_colors * (len(measures) // len(modern_colors) + 1), labels):
        y_position = 0.5  # Initialize y_position

        # Find a y_position that doesn't overlap
        while any(abs(pos - y_position) < 0.05 for pos in positions):
            y_position += 0.05

        positions.append(y_position)  # Store this position

        # Add shape and annotation
        shapes.append(
            dict(type='line', x0=value, x1=value, y0=0, y1=1, yref='paper', line=dict(color=color, width=2))
        )
        annotations.append(
            dict(x=value, y=y_position, xref='x', yref='paper', text=label, showarrow=False, xanchor='left', font=dict(color=color))
        )

    # Dark mode layout settings
    dark_layout = go.Layout(
        paper_bgcolor='rgba(10,10,10,1)',
        plot_bgcolor='rgba(15,15,15,1)',
        xaxis=dict(showgrid=False, tickfont=dict(color='white')),
        yaxis=dict(showgrid=False, tickfont=dict(color='white')),
        shapes=shapes,
        annotations=annotations
    )

    fig.update_layout(dark_layout, template='plotly')
    fig.show()

# Call the function to display the plot
plot_boxplot()


The Trimmed Mean of the numeric_column with 20.0% trimming is: $2,581,944,444.44
The Winsorized Mean of the numeric_column is, with Limit 10%: $3,237,007,575.76
The Winsorized Trimmed Mean of the numeric_column with 20.0% Winsorization is: $2,849,166,666.67
The Truncated Mean of the numeric_column is: $2,896,259,469.70
The Tukey's Biweight Mean of the numeric_column is: $3,592,044,073.78
The Adaptive Mean of the numeric_column is: $2,739,101,957.07
The Skewness-Adjusted Mean of the numeric_column is: $4,623,787,888.79
The Winsorized Harmonic Mean of the numeric_column is: $2,193,520,937.02
The Outlier-Adjusted Mean of the numeric_column is: $3,703,150,211.29
The Asymmetric Trimmed Mean of the numeric_column is: $2,393,506,493.51
The arithmetic mean of the numeric_column is: $4,623,787,878.79
The median of the numeric_column is: $2,300,000,000.00
The mode of the numeric_column is: $1,200,000,000.00
A mode of the numeric_column is: $1,200,000,000.00
The Geometric Mean of the numeric_colu

# Discriptive Statistics - Measuring Central Tendency (Column Output)
- Rolling Geometric Mean
- Exponentially Weighted Geometric Mean
- Moving Average
- Exponential Moving Average
- Running Mean
- Seasonal Decomposition
- Z-Score Mean
- Simple Moving Median

In [None]:
# Rolling Geometric Mean
window_size = 3  # Example window size
rolling_geometric_mean = data_frame[numeric_column].rolling(window=window_size).apply(gmean, raw=True)
rolling_geometric_mean_str = rolling_geometric_mean.apply("${:,.2f}".format)
print("The Rolling Geometric Mean of the numeric_column is:")
print(rolling_geometric_mean_str)

# --------------------------------------------------------------------------------------------------------------

# Exponentially Weighted Geometric Mean
span = 3  # Example span for exponential weighting
exponentially_weighted_geometric_mean_calc = np.exp(pd.Series(np.log(data_frame[numeric_column])).ewm(span=span).mean())
exponentially_weighted_geometric_mean = exponentially_weighted_geometric_mean_calc.apply("${:,.2f}".format)
print(f"The Exponentially Weighted Geometric Mean of the numeric_column is:")
print(exponentially_weighted_geometric_mean)

# --------------------------------------------------------------------------------------------------------------

# Moving Average
moving_average = data_frame[numeric_column].rolling(window=window_size).mean()
print(f"The Moving Average of the numeric_column is:\n{moving_average.apply('${:,.2f}'.format)}")

# --------------------------------------------------------------------------------------------------------------

# Exponential Moving Average
span = 3
exponential_moving_average = data_frame[numeric_column].ewm(span=span).mean()
print(f"\nThe Exponential Moving Average of the numeric_column is:\n{exponential_moving_average.apply('${:,.2f}'.format)}")

# --------------------------------------------------------------------------------------------------------------

# Running Mean
running_mean = data_frame[numeric_column].expanding().mean()
print(f"\nThe Running Mean of the numeric_column is:\n{running_mean.apply('${:,.2f}'.format)}")

# --------------------------------------------------------------------------------------------------------------

# Seasonal Decomposition
# Note: Seasonal Decomposition requires a time series with a frequency
data_frame.index = pd.date_range(start='2022-01-01', periods=len(data_frame), freq='D')
decomposition = seasonal_decompose(data_frame[numeric_column], model='additive')
seasonal_mean = decomposition.trend + decomposition.seasonal
print(f"\nThe Seasonal Decomposition of the numeric_column is:\n{seasonal_mean.apply('${:,.2f}'.format)}")

# --------------------------------------------------------------------------------------------------------------

# Z-Score Mean
z_scores = zscore(data_frame[numeric_column])
z_score_mean = np.mean(z_scores)
print(f"\nThe Z-Score Mean of the numeric_column is: ${z_score_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Simple Moving Median
simple_moving_median = data_frame[numeric_column].rolling(window=window_size).median()
print(f"\nThe Simple Moving Median of the numeric_column is:\n{simple_moving_median.apply('${:,.2f}'.format)}")


# Discriptive Statistics - Measuring Central Tendency (Context Required)
- Projection Mean
- Cluster-Based Mean
- Geometric Median
- Bi-weight Midvariance
- Harrell-Davis Quantile Mean
- Rank-Ordered Mean
- Spliced Mean
- Chained Mean
- Quasi-Harmonic Mean
- Calibrated Mean
- Fractional Mean
- Local Mean
- Contraharmonic Mean
- Spectral Mean
- Circular Mean
- Sinusoidal Mean
- Complex Mean
- Joint Mean
- Copula Mean
- Stochastic Averages
- Bayesian Mean
- Pareto Mean
- Pooled Mean
- Harmonic-Power Mean
- Windowed Harmonic Mean
- Central Moment Mean
- Spectral Decomposition Mean
- Range-Adjusted Mean
- Risk-Adjusted Mean
- Moment-Adjusted Mean
- Normalized Mean
- Relative Mean

In [None]:
# Cluster-Based Mean
# kmeans = KMeans(n_clusters=3)
# data_frame['cluster'] = kmeans.fit_predict(data_frame)
# cluster_based_means = data_frame.groupby('cluster')[numeric_column].mean()
#
# for cluster, mean in cluster_based_means.iteritems():
#     print(f"The Cluster-Based Mean for cluster {cluster} of the numeric_column is: {mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Projection Mean
# pca = PCA(n_components=1)
# projected_data = pca.fit_transform(data_frame.drop(columns=[numeric_column]))
# projection_mean = np.mean(projected_data)
# print(f"The Projection Mean of the numeric_column is: {projection_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Geometric Median
# Complex optimization Problem

# --------------------------------------------------------------------------------------------------------------

# Bi-weight Midvariance
biweight_midvariance_result = biweight_midvariance(data_frame[numeric_column])
print(f"The Bi-weight Midvariance of the numeric_column is: {biweight_midvariance_result:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Harrell-Davis Quantile Mean
# Complex

# --------------------------------------------------------------------------------------------------------------

# Rank-Ordered Mean
# Rank-Ordered Mean
ranks = data_frame[numeric_column].rank()
rank_ordered_mean = ranks.mean()
print(f"The Rank-Ordered Mean of the numeric_column is: {rank_ordered_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Spliced Mean
# Requires definition of how the data is spliced.

# --------------------------------------------------------------------------------------------------------------

# Chained Mean
# In the context of multiple imputation, this would involve a series of imputations where the mean of one is used to estimate the next.

# --------------------------------------------------------------------------------------------------------------

# Quasi-Harmonic Mean
# This would require a specific definition or weighting to calculate.

# --------------------------------------------------------------------------------------------------------------

# Calibrated Mean
# Requires calibration data or a model to adjust the mean accordingly.

# --------------------------------------------------------------------------------------------------------------

# Fractional Mean
# Requires a specific definition or weighting scheme.

# --------------------------------------------------------------------------------------------------------------

# Local Mean
# Typically applied to a sliding window of data points. Requires a window size and possibly a weighting scheme.

# --------------------------------------------------------------------------------------------------------------

# Contraharmonic Mean
squared_sum = np.sum(np.square(data_frame[numeric_column]))
sum_of_values = np.sum(data_frame[numeric_column])
contraharmonic_mean = squared_sum / sum_of_values if sum_of_values != 0 else float('nan')
print(f"The Contraharmonic Mean of the numeric_column is: {contraharmonic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Spectral Mean
fft_values = np.fft.fft(data_frame[numeric_column])
spectral_mean = np.mean(np.abs(fft_values))
print(f"The Spectral Mean of the numeric_column is: {spectral_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Circular Mean
angular_data = np.deg2rad(data_frame[numeric_column])  # Convert to radians if your data is in degrees
circular_mean = np.angle(np.mean(np.exp(1j * angular_data)))
print(f"The Circular Mean of the numeric_column is: {np.rad2deg(circular_mean):,.2f} degrees")

# --------------------------------------------------------------------------------------------------------------

# Sinusoidal Mean
sinusoidal_mean = np.mean(np.sin(data_frame[numeric_column]))
print(f"The Sinusoidal Mean of the numeric_column is: {sinusoidal_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Complex Mean
complex_mean = np.mean(data_frame[numeric_column].apply(lambda x: complex(x)))  # Assuming the data can be converted to complex numbers
print(f"The Complex Mean of the numeric_column is: {complex_mean}")

# --------------------------------------------------------------------------------------------------------------

# Joint Mean (mean of multiple columns)
# joint_mean = data_frame.mean(axis=1)  # Assuming data_frame has only the relevant numeric columns
# print(f"The Joint Mean of the numeric_column is: {joint_mean.mean():,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Copula Mean
# This would require a fitted copula model. Generally, the mean would be the average of samples generated from the copula or the expectation under the copula's density.

# Stochastic Averages
# Financial Data

# --------------------------------------------------------------------------------------------------------------

# Bayesian Mean
# Need Prior Distribution

# --------------------------------------------------------------------------------------------------------------

# Pareto Mean
# Validate the Pareto distribution first

# --------------------------------------------------------------------------------------------------------------

# Pooled Mean
# Need two+ groups (Meta Analysis)

# --------------------------------------------------------------------------------------------------------------

# Windowed Harmonic Mean
# You need to define the window size 'w' for Windowed Harmonic Mean
w = 5  # Example value
windowed_harmonic_mean = (1 / (data_frame[numeric_column].rolling(window=w).mean())).mean()
print(f"The Windowed Harmonic Mean (window size={w}) is: {windowed_harmonic_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Harmonic-Power Mean
# You need to define parameters 'p' and 'q' for Harmonic-Power Mean
p = 2  # Example value
q = -1 # Example value
harmonic_power_mean = ((data_frame[numeric_column] ** p).mean())**(1/q)
print(f"The Harmonic-Power Mean (p={p}, q={q}) is: {harmonic_power_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Central Moment Mean
# You need to define a parameter 'k' for the Central Moment
k = 3  # Example value
central_moment_mean = ((data_frame[numeric_column] - data_frame[numeric_column].mean()) ** k).mean()
print(f"The Central Moment Mean (k={k}) is: {central_moment_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Spectral Decomposition Mean
data = data_frame[numeric_column].values
length = len(data)
sqrt_length = int(length ** 0.5)

if sqrt_length ** 2 == length:
    data_matrix = data.reshape((sqrt_length, sqrt_length))
    eigenvalues, _ = np.linalg.eig(data_matrix)
    spectral_decomposition_mean = np.mean(eigenvalues)
    print(f"The Spectral Decomposition Mean is: {spectral_decomposition_mean:,.2f}")
else:
    print("The length of the data is not a perfect square, so it cannot be reshaped into a square matrix.")

# --------------------------------------------------------------------------------------------------------------

# Range-Adjusted Mean
range_adjusted_mean = (data_frame[numeric_column].mean() - data_frame[numeric_column].min()) / (data_frame[numeric_column].max() - data_frame[numeric_column].min())
print(f"The Range-Adjusted Mean is: {range_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Risk-Adjusted Mean
# This calculation depends on the context and the definition of 'risk'. Here's a basic example:
risk = data_frame[numeric_column].std()  # Example: Using standard deviation as a measure of risk
risk_adjusted_mean = data_frame[numeric_column].mean() / risk
print(f"The Risk-Adjusted Mean is: {risk_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Moment-Adjusted Mean
# You need to define a parameter 'moment'
moment = 3  # Example value
moment_adjusted_mean = ((data_frame[numeric_column] ** moment).mean())**(1/moment)
print(f"The Moment-Adjusted Mean (moment={moment}) is: {moment_adjusted_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Relative Mean
relative_mean = data_frame[numeric_column].mean() / data_frame[numeric_column].std()
print(f"The Relative Mean of the numeric_column is: {relative_mean:,.2f}")

# --------------------------------------------------------------------------------------------------------------

# Normalized Mean (Mean of normalized data, scaled 0-1)
normalized_mean_calc = (data_frame[numeric_column] - data_frame[numeric_column].min()) / (data_frame[numeric_column].max() - data_frame[numeric_column].min())
normalized_mean = normalized_mean_calc.mean()
print(f"The Normalized Mean of the numeric_column is: {normalized_mean:,.2f}")



# Weighted Measures (In Progress)
- Weighted Mean
- Weighted Geometric Mean
- Weighted Harmonic Mean
- Weighted Median
- Weighted Running Mean (Window_Size)
- Weighted Cluster Mean
- Weighted Circular Mean (Requires Circular Data)
- Weighted Mid-Mean (Left Off) Review Weighted Logic
- Bounded Weighted Mean
- Double Weighted Mean
- Distance-Weighted Mean
- Variance-Weighted Mean
- Alpha-Trimmed Mean
- Adaptive Weighted Mean
- Adjusted Weighted Mean
- Time-weighted Mean
- Weighted Moving Average
- Moving Weighted Median
- Weighted Pseudomedian

In [None]:
# Weighted Mean
weighted_mean = (data_frame[numeric_column] * data_frame['weights']).sum()

# Output the weighted mean, formatted as requested
weighted_mean_output = f"The Weighted Mean of numeric_column using the weighted_column is: {weighted_mean}"
print(weighted_mean_output)

# --------------------------------------------------------------------------------------------------------------

# Weighted Geometric Mean
def weighted_geometric_mean_function(df, value_col, weight_col):
    product = np.prod(df[value_col] ** df[weight_col])
    wgm = product ** (1 / df[weight_col].sum())
    return wgm

weighted_geometric_mean = weighted_geometric_mean_function(data_frame, numeric_column, 'weights')
print("The Weighted Geometric Mean of the numeric_column using the weighted_column is:", weighted_geometric_mean)

# --------------------------------------------------------------------------------------------------------------

# Weighted Harmonic Mean
def weighted_harmonic_mean_calc(df, value_col, weight_col):
    denominator = sum(df[weight_col] / df[value_col])
    whm = sum(df[weight_col]) / denominator
    return whm

weighted_harmonic_mean = weighted_harmonic_mean_calc(data_frame, numeric_column, 'weights')
print("The Weighted Harmonic Mean of the numeric_column using the weighted_column is:", weighted_harmonic_mean)

# --------------------------------------------------------------------------------------------------------------

# Weighted Median
def weighted_median_calc(df, value_col, weight_col):
    df_sorted = df.sort_values(value_col)
    total_weight = df_sorted[weight_col].sum()
    cumsum_weight = df_sorted[weight_col].cumsum()
    median_weight = total_weight / 2

    return df_sorted.loc[cumsum_weight >= median_weight, value_col].iloc[0]

weighted_median = weighted_median_calc(data_frame, numeric_column, 'weights')
print("The Weighted Median of the numeric_column using the weighted_column is:", weighted_median)

# --------------------------------------------------------------------------------------------------------------

# Weighted Running Mean
window_size = 3  # You can change this based on your requirements, depends on the size of the dataset and goal

# Function to calculate weighted running mean with dynamic weights
def custom_weighted_running_mean(row_indices, df, numeric_col, weight_col):
    values = df.loc[row_indices, numeric_col].values
    weights = df.loc[row_indices, weight_col].values
    if len(values) == len(weights):
        return np.sum(values * weights) / np.sum(weights)
    else:
        return np.nan

weighted_running_mean_dynamic = []

# Loop through the DataFrame to apply the custom rolling window function
for i in range(len(data_frame)):
    start_idx = max(0, i - window_size + 1)
    end_idx = i + 1
    row_indices = range(start_idx, end_idx)
    weighted_mean = custom_weighted_running_mean(row_indices, data_frame, numeric_column, 'weights')
    weighted_running_mean_dynamic.append(weighted_mean)

# Add the calculated weighted running mean to the DataFrame
data_frame['weighted_running_mean_dynamic'] = weighted_running_mean_dynamic

# --------------------------------------------------------------------------------------------------------------

# Weighted Cluster Mean
# Perform standard KMeans clustering on the numeric_column
standard_cluster = data_frame[[numeric_column]].values  # Using only the numeric_column for clustering
kmeans = KMeans(n_clusters=3, random_state=0).fit(standard_cluster)
initial_centers = kmeans.cluster_centers_

# Function to calculate the weighted mean of a cluster
def weighted_cluster_mean(cluster_points, cluster_weights):
    return np.average(cluster_points, axis=0, weights=cluster_weights)

# Update cluster centers using weighted mean
new_centers = np.array([weighted_cluster_mean(standard_cluster[kmeans.labels_ == i], data_frame['weights'][kmeans.labels_ == i]) for i in range(3)])

# --------------------------------------------------------------------------------------------------------------

# Weighted Circular Mean
# Sample angular data in radians and corresponding weights
angles = np.array([0, np.pi/4, np.pi/2, 3*np.pi/4, np.pi])  # Sample angles in radians
weights = np.array([1, 1, 2, 1, 1])  # Sample weights

# Function to calculate the weighted circular mean
def weighted_circular_mean(angles, weights):
    weighted_sin = np.sum(np.sin(angles) * weights)
    weighted_cos = np.sum(np.cos(angles) * weights)
    total_weight = np.sum(weights)
    return np.arctan2(weighted_sin / total_weight, weighted_cos / total_weight)

# Calculate the weighted circular mean
weighted_circular_mean_result = weighted_circular_mean(angles, weights)
weighted_circular_mean_result, np.degrees(weighted_circular_mean_result)  # Result in radians and degrees

# --------------------------------------------------------------------------------------------------------------

# Weighted Moving Average
weights = np.array([0.1, 0.2, 0.7])  # Example weights
weighted_moving_average = data_frame[numeric_column].rolling(window=window_size).apply(lambda x: np.sum(weights * x), raw=True)
print(f"\nThe Weighted Moving Average of the numeric_column is:\n{weighted_moving_average.apply('${:,.2f}'.format)}")

# --------------------------------------------------------------------------------------------------------------

# Moving Weighted Median

# --------------------------------------------------------------------------------------------------------------

# Weighted Pseudomedian
