In [13]:
!pip install pulp
import pandas as pd
import numpy as np
from pulp import LpProblem, LpVariable, lpSum, LpMaximize, LpStatus, GLPK
from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns



In [14]:
# Load data from the specified URL
url = 'https://raw.githubusercontent.com/PhuphaB/Dissertation/main/ESPN_Top_40_Players.csv'
data = pd.read_csv(url, skiprows=[0]) # Skip the first row

In [15]:
# Rename columns for easier access
data.columns = [
    'ESPN_Ranking', 'Player', 'Team', 'Team_IG_Followers_M',
    'PER', 'Minutes_Played', 'Clutch_PPG', 'Win_Shares',
    'True_Shooting_Pct', 'IG_Growth_Pct', 'IG_Engagement_Pct',
    'Player_IG_Followers_M', 'Google_Trends_Score', 'All_Star_Votes_M'
]

# Check updated column names
data.head()

Unnamed: 0,ESPN_Ranking,Player,Team,Team_IG_Followers_M,PER,Minutes_Played,Clutch_PPG,Win_Shares,True_Shooting_Pct,IG_Growth_Pct,IG_Engagement_Pct,Player_IG_Followers_M,Google_Trends_Score,All_Star_Votes_M
0,2,Luka Doncic,Los Angeles Lakers,25.32,24.1,1769,1.9,5.9,58.7,24.0,7.12,10.49,5.58,2.03
1,3,Giannis Antetokounmpo,Milwaukee Bucks,5.5,30.5,2289,2.9,11.5,62.5,1.26,1.79,15.99,32.74,4.44
2,4,Shai Gilgeous-Alexander,Oklahoma City Thunder,6.16,30.7,2598,3.0,16.7,63.7,69.19,10.73,4.01,41.52,3.15
3,5,Jayson Tatum,Boston Celtics,8.46,21.7,2624,3.0,9.5,58.2,10.6,5.22,7.3,32.02,3.61
4,6,Stephen Curry,Golden State Warriors,32.72,21.5,2252,3.6,7.9,61.8,6.2,0.77,58.11,29.29,2.34


In [16]:
def run_dea(inputs_df, outputs_df, ar_constraint=False, return_weights=False, ar_lower_bound=0.05):
    """
    A single, master function to perform DEA.
    """
    inputs = inputs_df.values
    outputs = outputs_df.values
    player_names = data['Player'].values
    num_players = len(player_names)
    num_inputs = inputs_df.shape[1]
    num_outputs = outputs_df.shape[1]

    efficiency_scores = {}
    optimal_weights = {}

    for i in range(num_players):
        prob = LpProblem(f"DEA_Player_{i}", LpMaximize)
        u = [LpVariable(f"u_{r}_{i}", lowBound=0) for r in range(num_outputs)]
        v = [LpVariable(f"v_{s}_{i}", lowBound=0) for s in range(num_inputs)]

        prob += lpSum(u[r] * outputs[i, r] for r in range(num_outputs))
        prob += lpSum(v[s] * inputs[i, s] for s in range(num_inputs)) == 1

        for j in range(num_players):
            prob += lpSum(u[r] * outputs[j, r] for r in range(num_outputs)) - \
                    lpSum(v[s] * inputs[j, s] for s in range(num_inputs)) <= 0

        if ar_constraint:
            sum_of_u_weights = lpSum(u)
            for r in range(num_outputs):
                prob += u[r] >= ar_lower_bound * sum_of_u_weights

        prob.solve()

        if LpStatus[prob.status] == 'Optimal':
            efficiency_scores[player_names[i]] = prob.objective.value()
            if return_weights:
                optimal_weights[player_names[i]] = {
                    'u': [var.value() for var in u],
                    'v': [var.value() for var in v]
                }
        else:
            efficiency_scores[player_names[i]] = None
            if return_weights:
                optimal_weights[player_names[i]] = None

    results_df = pd.DataFrame(list(efficiency_scores.items()), columns=['Player', 'Efficiency_Score'])

    if return_weights:
        return results_df, optimal_weights
    else:
        return results_df

#Model 4 (Without Cumulative Statistic)

In [17]:
# Define inputs for Model 4 (only rate-based statistics)
input_cols_m4 = ['PER', 'True_Shooting_Pct', 'Clutch_PPG']
output_cols_m4 = ['IG_Growth_Pct', 'IG_Engagement_Pct', 'Player_IG_Followers_M', 'All_Star_Votes_M', 'Google_Trends_Score']
model4_inputs = data[input_cols_m4]
model4_outputs = data[output_cols_m4]

In [18]:
# Run Dea Model 4
model4_scores, model4_weights = run_dea(model4_inputs, model4_outputs, ar_constraint=True, return_weights=True)
model4_scores = model4_scores.sort_values(by='Efficiency_Score', ascending=False).reset_index(drop=True)

In [19]:
# Show and Save Scores
print("\n--- Results: Model 4 (Standard AR Scores) ---")
print(tabulate(model4_scores, headers='keys', tablefmt='fancy_grid', showindex=False))
model4_scores.to_csv('model_4_with_ar_scores.csv', index=False)
print("\nStandard AR scores saved to 'model_4_with_ar_scores.csv'")


--- Results: Model 4 (Standard AR Scores) ---
╒═════════════════════════╤════════════════════╕
│ Player                  │   Efficiency_Score │
╞═════════════════════════╪════════════════════╡
│ Tyrese Haliburton       │           1        │
├─────────────────────────┼────────────────────┤
│ Victor Wembenyama       │           1        │
├─────────────────────────┼────────────────────┤
│ Devin Booker            │           1        │
├─────────────────────────┼────────────────────┤
│ Anthony Edwards         │           1        │
├─────────────────────────┼────────────────────┤
│ Chet Holmgren           │           1        │
├─────────────────────────┼────────────────────┤
│ Lebron James            │           1        │
├─────────────────────────┼────────────────────┤
│ Jrue Holiday            │           1        │
├─────────────────────────┼────────────────────┤
│ Karl-Anthony Towns      │           0.981675 │
├─────────────────────────┼────────────────────┤
│ Anthony Davis       

In [20]:
# Process and Save Weights
weights_data_m4 = []
for player, w in model4_weights.items():
    if w:
        row = {'Player': player}
        for i, col in enumerate(output_cols_m4):
            row[f'u_{col}'] = w['u'][i]
        for i, col in enumerate(input_cols_m4):
            row[f'v_{col}'] = w['v'][i]
        weights_data_m4.append(row)
weights_df_m4 = pd.DataFrame(weights_data_m4)
weights_df_m4.to_csv('model_4_with_ar_weights.csv', index=False)
print("Optimal weights saved to 'model_4_with_ar_weights.csv'\n")

Optimal weights saved to 'model_4_with_ar_weights.csv'



#Cross Efficiency Matrix

In [21]:
def calculate_cross_efficiency(inputs_df, outputs_df, all_weights):
    """Calculates the cross-efficiency matrix and final average scores."""
    inputs = inputs_df.values
    outputs = outputs_df.values
    player_names = data['Player'].values
    num_players = len(player_names)
    cross_efficiency_matrix = np.zeros((num_players, num_players))

    for i, i_player_name in enumerate(player_names):
        weights = all_weights.get(i_player_name)
        if weights is None: continue
        evaluator_u, evaluator_v = weights['u'], weights['v']

        for j, j_player_name in enumerate(player_names):
            weighted_outputs = np.sum(outputs[j, :] * evaluator_u)
            weighted_inputs = np.sum(inputs[j, :] * evaluator_v)
            score = weighted_outputs / weighted_inputs if weighted_inputs != 0 else 0
            cross_efficiency_matrix[i, j] = score

    final_cross_scores = np.mean(cross_efficiency_matrix, axis=0)
    scores_df = pd.DataFrame({'Player': player_names, 'Cross_Efficiency_Score': final_cross_scores})
    matrix_df = pd.DataFrame(cross_efficiency_matrix, index=player_names, columns=player_names)
    return matrix_df, scores_df

In [22]:
# Run Cross Efficiency Analysis
m4_matrix_df, m4_scores_df = calculate_cross_efficiency(model4_inputs, model4_outputs, model4_weights)

In [23]:
# Display and Save Full Matrix
print("\n--- Full Cross-Efficiency Matrix (Model 4) ---")
print(m4_matrix_df.round(3))
m4_matrix_df.to_csv('model_4_cross_efficiency_matrix.csv')
print("\nFull matrix saved to 'model_4_cross_efficiency_matrix.csv'")


--- Full Cross-Efficiency Matrix (Model 4) ---
                         Luka Doncic  Giannis Antetokounmpo  \
Luka Doncic                    0.569                  0.444   
Giannis Antetokounmpo          0.212                  0.627   
Shai Gilgeous-Alexander        0.445                  0.435   
Jayson Tatum                   0.193                  0.625   
Stephen Curry                  0.158                  0.458   
Lebron James                   0.075                  0.085   
Joel Embiid                    0.221                  0.615   
Kevin Durant                   0.189                  0.597   
Anthony Edwards                0.250                  0.198   
Victor Wembenyama              0.431                  0.179   
Jalen Brunson                  0.425                  0.187   
Anthony Davis                  0.463                  0.459   
Jaylen Brown                   0.189                  0.597   
Devin Booker                   0.157                  0.562   
Tyrese 

In [24]:
# Display and Save Final Ranked Scores
print("\n--- Final Cross-Efficiency Ranked Scores ---")
m4_scores_df['Rank'] = m4_scores_df['Cross_Efficiency_Score'].rank(method='min', ascending=False).astype(int)
m4_scores_df = m4_scores_df.sort_values(by='Cross_Efficiency_Score', ascending=False).reset_index(drop=True)
print(tabulate(m4_scores_df, headers='keys', tablefmt='fancy_grid', showindex=False))
m4_scores_df.to_csv('cross_efficiency_ranking.csv', index=False)
print("\nFinal ranking saved to 'cross_efficiency_ranking.csv'")


--- Final Cross-Efficiency Ranked Scores ---
╒═════════════════════════╤══════════════════════════╤════════╕
│ Player                  │   Cross_Efficiency_Score │   Rank │
╞═════════════════════════╪══════════════════════════╪════════╡
│ Anthony Edwards         │                 0.932979 │      1 │
├─────────────────────────┼──────────────────────────┼────────┤
│ Jrue Holiday            │                 0.913156 │      2 │
├─────────────────────────┼──────────────────────────┼────────┤
│ Lebron James            │                 0.76655  │      3 │
├─────────────────────────┼──────────────────────────┼────────┤
│ Victor Wembenyama       │                 0.746328 │      4 │
├─────────────────────────┼──────────────────────────┼────────┤
│ Shai Gilgeous-Alexander │                 0.707993 │      5 │
├─────────────────────────┼──────────────────────────┼────────┤
│ Kawhi Leonard           │                 0.655204 │      6 │
├─────────────────────────┼──────────────────────────┼────