# Comprehensiva Ranking Module

# Import Data

In [1]:
import pandas as pd
file_path = 'PT_Dataset .xlsx'
df_normonized = pd.read_excel(file_path, sheet_name='Normonized')

# Weight Allocation

TOPSIS + Entropy method

In [2]:
import numpy as np

# seven features
features = [
    'videos_count_norm',
    'total_likes_norm',
    'workout_recommendations_norm',
    'athlete_rating_norm',
    'retention_rate_norm',
    'profile_completeness_norm',
    'cert_score'
]
X = df_normonized[features].values

# Step 1: Entropy Method Weights
# Normalization (prevent all-zero columns), data already scaled to [0,1]
P = X / X.sum(axis=0)  # normalized proportion

# Entropy calculation
eps = 1e-12  # to avoid log(0)
E = -np.nansum(P * np.log(P + eps), axis=0) / np.log(len(X))
d = 1 - E
w = d / d.sum()  # entropy weights

# Step 2: TOPSIS Composite Score
# Ideal best and worst solutions
Z_plus = X.max(axis=0)
Z_minus = X.min(axis=0)
# Weighted distance calculation
D_plus = np.sqrt(((X - Z_plus) ** 2 * w).sum(axis=1))
D_minus = np.sqrt(((X - Z_minus) ** 2 * w).sum(axis=1))
# Closeness coefficient
score = D_minus / (D_plus + D_minus)

# result
df_result = df_normonized.copy()
df_result['topsis_score'] = score
df_result['topsis_rank'] = df_result['topsis_score'].rank(ascending=False, method='min').astype(int)

for i, col in enumerate(features):
    df_result[f'weight_{col}'] = round(w[i], 4)


In [3]:
weights_table = pd.DataFrame({
    'feature': features,
    'weight': np.round(w, 3)
})

ranking_table = df_result[['trainer_id', 'name', 'topsis_score', 'topsis_rank']].copy()
ranking_table['topsis_score'] = ranking_table['topsis_score'].round(3)
ranking_table = ranking_table.sort_values('topsis_rank')

In [4]:
weights_table

Unnamed: 0,feature,weight
0,videos_count_norm,0.132
1,total_likes_norm,0.078
2,workout_recommendations_norm,0.228
3,athlete_rating_norm,0.061
4,retention_rate_norm,0.285
5,profile_completeness_norm,0.038
6,cert_score,0.177


In [5]:
ranking_table

Unnamed: 0,trainer_id,name,topsis_score,topsis_rank
229,230,Trainer 230,0.917,1
839,840,Trainer 840,0.908,2
194,195,Trainer 195,0.900,3
915,916,Trainer 916,0.891,4
284,285,Trainer 285,0.889,5
...,...,...,...,...
100,101,Trainer 101,0.405,996
123,124,Trainer 124,0.404,997
667,668,Trainer 668,0.394,998
881,882,Trainer 882,0.369,999


# Optimized Weight

Introduction of bonus points with a fixed weighting of 20 per cent, recalculation of the individual weighting scores and reordering of the scores

In [None]:
file_path_latest = 'PT_Dataset .xlsx'
df_normonized = pd.read_excel(file_path_latest, sheet_name='Normonized')

features = [
    'videos_count_norm', 'total_likes_norm', 'workout_recommendations_norm',
    'athlete_rating_norm', 'retention_rate_norm', 'profile_completeness_norm', 'cert_score'
]

bonus points calculation

In [3]:
def calc_bonus_points(group_df):
    bonus = np.zeros(len(group_df), dtype=int)
    arr = group_df[features].values
    # Score each feature column separately
    for col_idx, feat in enumerate(features):
        q30 = group_df[feat].quantile(0.3)
        q70 = group_df[feat].quantile(0.7)
        # top 30% +1 point, bottom 30% -1 point
        bonus += (arr[:, col_idx] >= q70).astype(int)
        bonus -= (arr[:, col_idx] <= q30).astype(int)
    # Check if "all features are above 50%", if so add extra +5 points
    q50s = [group_df[feat].quantile(0.5) for feat in features]
    in_top50 = (arr >= q50s).all(axis=1)
    bonus += in_top50.astype(int) * 5
    return pd.Series(bonus, index=group_df.index)

# Apply group-wise processing
df_normonized['bonus_points'] = df_normonized.groupby('Group', group_keys=False).apply(calc_bonus_points)

df_normonized[['trainer_id', 'name', 'Group', 'bonus_points']].head(10)


  df_normonized['bonus_points'] = df_normonized.groupby('Group', group_keys=False).apply(calc_bonus_points)


Unnamed: 0,trainer_id,name,Group,bonus_points
0,1,Trainer 1,A,3
1,2,Trainer 2,A,-1
2,3,Trainer 3,C,0
3,4,Trainer 4,C,3
4,5,Trainer 5,C,0
5,6,Trainer 6,A,3
6,7,Trainer 7,B,5
7,8,Trainer 8,B,-1
8,9,Trainer 9,C,-1
9,10,Trainer 10,C,1


In [4]:
# Min-Max
bonus_min = df_normonized['bonus_points'].min()
bonus_max = df_normonized['bonus_points'].max()
df_normonized['bonus_points_norm'] = (df_normonized['bonus_points'] - bonus_min) / (bonus_max - bonus_min)

# show 
df_normonized[['trainer_id', 'name', 'Group', 'bonus_points', 'bonus_points_norm']].head(10)

Unnamed: 0,trainer_id,name,Group,bonus_points,bonus_points_norm
0,1,Trainer 1,A,3,0.526316
1,2,Trainer 2,A,-1,0.315789
2,3,Trainer 3,C,0,0.368421
3,4,Trainer 4,C,3,0.526316
4,5,Trainer 5,C,0,0.368421
5,6,Trainer 6,A,3,0.526316
6,7,Trainer 7,B,5,0.631579
7,8,Trainer 8,B,-1,0.315789
8,9,Trainer 9,C,-1,0.315789
9,10,Trainer 10,C,1,0.421053


In [5]:
# define optimized weight
weight_dict = {
    'videos_count_norm': 0.106,
    'total_likes_norm': 0.062,
    'workout_recommendations_norm': 0.182,
    'athlete_rating_norm': 0.049,
    'retention_rate_norm': 0.228,
    'profile_completeness_norm': 0.030,
    'cert_score': 0.142,
    'bonus_points_norm': 0.200
}

# calculate final score
df_normonized['final_score'] = (
    df_normonized['videos_count_norm'] * weight_dict['videos_count_norm'] +
    df_normonized['total_likes_norm'] * weight_dict['total_likes_norm'] +
    df_normonized['workout_recommendations_norm'] * weight_dict['workout_recommendations_norm'] +
    df_normonized['athlete_rating_norm'] * weight_dict['athlete_rating_norm'] +
    df_normonized['retention_rate_norm'] * weight_dict['retention_rate_norm'] +
    df_normonized['profile_completeness_norm'] * weight_dict['profile_completeness_norm'] +
    df_normonized['cert_score'] * weight_dict['cert_score'] +
    df_normonized['bonus_points_norm'] * weight_dict['bonus_points_norm']
)

# ranking
df_normonized['final_rank'] = df_normonized['final_score'].rank(ascending=False, method='min').astype(int)
df_normonized['final_score'] = df_normonized['final_score'].round(3)

# output result
final_cols = ['trainer_id', 'name', 'final_score', 'final_rank', 'Group']
df_result_final = df_normonized[final_cols].sort_values('final_rank')

df_result_final.head(10)

Unnamed: 0,trainer_id,name,final_score,final_rank,Group
229,230,Trainer 230,0.956,1,B
839,840,Trainer 840,0.921,2,A
915,916,Trainer 916,0.907,3,B
284,285,Trainer 285,0.906,4,C
578,579,Trainer 579,0.901,5,A
747,748,Trainer 748,0.891,6,A
721,722,Trainer 722,0.874,7,C
194,195,Trainer 195,0.871,8,B
592,593,Trainer 593,0.868,9,A
749,750,Trainer 750,0.861,10,B
