In [1]:
!apt-get install -y -qq glpk-utils
!pip install pulp
import pandas as pd
import numpy as np
from pulp import LpProblem, LpVariable, lpSum, LpMaximize, LpStatus, GLPK
from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns

Selecting previously unselected package libsuitesparseconfig5:amd64.
(Reading database ... 126371 files and directories currently installed.)
Preparing to unpack .../libsuitesparseconfig5_1%3a5.10.1+dfsg-4build1_amd64.deb ...
Unpacking libsuitesparseconfig5:amd64 (1:5.10.1+dfsg-4build1) ...
Selecting previously unselected package libamd2:amd64.
Preparing to unpack .../libamd2_1%3a5.10.1+dfsg-4build1_amd64.deb ...
Unpacking libamd2:amd64 (1:5.10.1+dfsg-4build1) ...
Selecting previously unselected package libcolamd2:amd64.
Preparing to unpack .../libcolamd2_1%3a5.10.1+dfsg-4build1_amd64.deb ...
Unpacking libcolamd2:amd64 (1:5.10.1+dfsg-4build1) ...
Selecting previously unselected package libglpk40:amd64.
Preparing to unpack .../libglpk40_5.0-1_amd64.deb ...
Unpacking libglpk40:amd64 (5.0-1) ...
Selecting previously unselected package glpk-utils.
Preparing to unpack .../glpk-utils_5.0-1_amd64.deb ...
Unpacking glpk-utils (5.0-1) ...
Setting up libsuitesparseconfig5:amd64 (1:5.10.1+dfsg-4b

In [2]:
# Load data from the specified URL
url = 'https://raw.githubusercontent.com/PhuphaB/Dissertation/main/ESPN_Top_40_Players.csv'
data = pd.read_csv(url, skiprows=[0]) # Skip the first row

In [3]:
# Rename columns
data.columns = [
    'ESPN_Ranking', 'Player', 'Team', 'Team_IG_Followers_M',
    'PER', 'Minutes_Played', 'Clutch_PPG', 'Win_Shares',
    'True_Shooting_Pct', 'IG_Growth_Pct', 'IG_Engagement_Pct',
    'Player_IG_Followers_M', 'Google_Trends_Score', 'All_Star_Votes_M'
]

# Check updated column names
data.head()

Unnamed: 0,ESPN_Ranking,Player,Team,Team_IG_Followers_M,PER,Minutes_Played,Clutch_PPG,Win_Shares,True_Shooting_Pct,IG_Growth_Pct,IG_Engagement_Pct,Player_IG_Followers_M,Google_Trends_Score,All_Star_Votes_M
0,2,Luka Doncic,Los Angeles Lakers,25.32,24.1,1769,1.9,5.9,58.7,24.0,7.12,10.49,5.58,2.03
1,3,Giannis Antetokounmpo,Milwaukee Bucks,5.5,30.5,2289,2.9,11.5,62.5,1.26,1.79,15.99,32.74,4.44
2,4,Shai Gilgeous-Alexander,Oklahoma City Thunder,6.16,30.7,2598,3.0,16.7,63.7,69.19,10.73,4.01,41.52,3.15
3,5,Jayson Tatum,Boston Celtics,8.46,21.7,2624,3.0,9.5,58.2,10.6,5.22,7.3,32.02,3.61
4,6,Stephen Curry,Golden State Warriors,32.72,21.5,2252,3.6,7.9,61.8,6.2,0.77,58.11,29.29,2.34


In [4]:
def run_vrs_dea_with_ar(inputs_df, outputs_df, ar_constraint=False, ar_lower_bound=0.05):
    """
    Runs an output-oriented VRS DEA to get the optimal weights for each player.
    Includes an optional Assurance Region (AR) constraint.
    """
    inputs = inputs_df.values
    outputs = outputs_df.values
    player_names = data['Player'].values
    num_players = len(player_names)
    num_inputs = inputs.shape[1]
    num_outputs = outputs.shape[1]
    optimal_weights = {}

    for i in range(num_players):
        prob = LpProblem(f"VRS_AR_DEA_Player_{i}", LpMaximize)
        u = [LpVariable(f"u_{r}_{i}", lowBound=0) for r in range(num_outputs)]
        v = [LpVariable(f"v_{s}_{i}", lowBound=0) for s in range(num_inputs)]
        u0 = LpVariable(f"u0_{i}") # VRS Intercept

        prob += lpSum(u[r] * outputs[i, r] for r in range(num_outputs)) - u0
        prob += lpSum(v[s] * inputs[i, s] for s in range(num_inputs)) == 1

        for j in range(num_players):
            prob += lpSum(u[r] * outputs[j, r] for r in range(num_outputs)) - \
                    lpSum(v[s] * inputs[j, s] for s in range(num_inputs)) - u0 <= 0

        if ar_constraint:
            sum_of_u_weights = lpSum(u)
            for r in range(num_outputs):
                prob += u[r] >= ar_lower_bound * sum_of_u_weights

        prob.solve(GLPK(msg=False))

        if LpStatus[prob.status] == 'Optimal':
            optimal_weights[player_names[i]] = {'u': [var.value() for var in u], 'v': [var.value() for var in v]}
        else:
            optimal_weights[player_names[i]] = None

    return optimal_weights

def calculate_cross_efficiency(inputs_df, outputs_df, all_weights):
    """Calculates the cross-efficiency matrix and final average scores."""
    inputs = inputs_df.values
    outputs = outputs_df.values
    player_names = data['Player'].values
    num_players = len(player_names)
    cross_efficiency_matrix = np.zeros((num_players, num_players))

    for i, i_player_name in enumerate(player_names):
        weights = all_weights.get(i_player_name)
        if weights is None: continue
        evaluator_u, evaluator_v = weights['u'], weights['v']

        for j, j_player_name in enumerate(player_names):
            weighted_outputs = np.sum(outputs[j, :] * evaluator_u)
            weighted_inputs = np.sum(inputs[j, :] * evaluator_v)
            score = weighted_outputs / weighted_inputs if weighted_inputs != 0 else 0
            cross_efficiency_matrix[i, j] = score

    final_cross_scores = np.mean(cross_efficiency_matrix, axis=0)
    scores_df = pd.DataFrame({'Player': player_names, 'Cross_Efficiency_Score': final_cross_scores})
    matrix_df = pd.DataFrame(cross_efficiency_matrix, index=player_names, columns=player_names)
    return matrix_df, scores_df

In [5]:
# Define the inputs and outputs for Definitive Model
input_cols = ['PER','True_Shooting_Pct', 'Clutch_PPG']
output_cols = ['IG_Growth_Pct', 'IG_Engagement_Pct', 'Player_IG_Followers_M', 'All_Star_Votes_M', 'Google_Trends_Score']
model1_inputs = data[input_cols]
model1_outputs = data[output_cols]

# First, run the VRS DEA with the AR constraint to get the optimal weights
print("--- Running VRS DEA with AR constraint to calculate optimal weights ---")
model1_vrs_ar_weights = run_vrs_dea_with_ar(
    model1_inputs,
    model1_outputs,
    ar_constraint=True, # This is the key change to activate the AR constraint
    ar_lower_bound=0.05
)
print("Optimal weights calculated.\n")

# Now, use those weights to calculate cross-efficiency
print("--- Calculating Cross-Efficiency Matrix and Scores ---")
matrix_df, scores_df = calculate_cross_efficiency(model1_inputs, model1_outputs, model1_vrs_ar_weights)
print("Cross-efficiency analysis complete.\n")

--- Running VRS DEA with AR constraint to calculate optimal weights ---
Optimal weights calculated.

--- Calculating Cross-Efficiency Matrix and Scores ---
Cross-efficiency analysis complete.



In [6]:
# --- Display and Save the Full Matrix ---
print("\n--- Full Cross-Efficiency Matrix (VRS Definitive Model with AR) ---")
print("Rows = The player whose weights are being used (Evaluator)")
print("Columns = The player being scored (Evaluatee)\n")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print(matrix_df.round(4))
matrix_df.to_csv('definitive_model_vrs_ar_cross_efficiency_full_matrix.csv')
print("\nFull matrix saved to 'definitive_model_vrs_ar_cross_efficiency_full_matrix.csv'")


# --- Display and Save Final Ranked Scores ---
print("\n--- Final Cross-Efficiency Ranked Scores (VRS Definitive Model with AR) ---")
scores_df['Rank'] = scores_df['Cross_Efficiency_Score'].rank(method='min', ascending=False).astype(int)
scores_df_sorted = scores_df.sort_values(by='Rank')
print(tabulate(scores_df_sorted, headers='keys', tablefmt='fancy_grid', showindex=False))
scores_df_sorted.to_csv('definitive_model_vrs_ar_cross_efficiency_ranking.csv', index=False)
print("\nFinal ranking saved to 'definitive_model_vrs_ar_cross_efficiency_ranking.csv'")


--- Full Cross-Efficiency Matrix (VRS Definitive Model with AR) ---
Rows = The player whose weights are being used (Evaluator)
Columns = The player being scored (Evaluatee)

                         Luka Doncic  Giannis Antetokounmpo  \
Luka Doncic                   0.0570                 0.0357   
Giannis Antetokounmpo         0.0358                 0.1076   
Shai Gilgeous-Alexander       0.0845                 0.0749   
Jayson Tatum                  0.0358                 0.1076   
Stephen Curry                 0.0325                 0.1053   
Lebron James                  0.1093                 0.1026   
Joel Embiid                   0.0119                 0.0156   
Kevin Durant                  0.0358                 0.1076   
Anthony Edwards               0.2520                 0.1993   
Victor Wembenyama             0.4723                 0.1612   
Jalen Brunson                 0.0748                 0.0360   
Anthony Davis                 0.0210                 0.0281   
Jaylen