# jupyter notebook to calculate the optimal combination of weights for pm

In [13]:
import pandas as pd
from scipy.optimize import nnls
from sklearn.metrics import mean_squared_error

In [2]:
ranking = pd.read_csv('/Users/kaspervanderhorst/Desktop/performance metric/team ranking.csv')
#has 99 if the team wasnt playing that year cause in 2022 there was an expansion
#should replace with 0
ranking = ranking.replace(99, 0)
ranking.head()

Unnamed: 0,Team,2018 Actual Rank,2019 Actual Rank,2020 Actual Rank,2021 Actual Rank,2022 Actual Rank,2023 Actual Rank
0,Kolkata Night Riders,3,5,5,4,7,7
1,Mumbai Indians,5,1,1,5,10,4
2,Kings XI Punjab,7,6,6,6,6,8
3,Sunrisers Hyderbad,1,4,3,8,8,10
4,Rajasthan Royals,4,7,8,7,2,5


In [3]:
teams = ranking['Team'].tolist()
print(teams)

['Kolkata Night Riders', 'Mumbai Indians', 'Kings XI Punjab', 'Sunrisers Hyderbad', 'Rajasthan Royals', 'Delhi Capitals', 'Royal Challengers Bangalore', 'Chennai Super Kings', 'Gujarat Titans', 'Lucknow Super Giants']


In [4]:
predicted_rankings = pd.read_csv('/Users/kaspervanderhorst/Desktop/performance metric/predicted_rankings.csv')
if 'Unnamed: 0' in predicted_rankings.columns:
    predicted_rankings = predicted_rankings.drop(columns=['Unnamed: 0'])
predicted_rankings.head()

#now here have to insert rows for each team that wasnt there that year and give 0 aswell
#the names of these teams are the Lucknow Super Giants and Gujarat Titans, so insert rows for both team with rank 0 for each year

Unnamed: 0,team_name,rank_pm1,rank_pm2,rank_pm3,year
0,Kolkata Night Riders,1,2,2,2018
1,Mumbai Indians,4,1,7,2018
2,Kings XI Punjab,3,6,3,2018
3,Sunrisers Hyderbad,2,3,5,2018
4,Rajasthan Royals,6,5,8,2018


In [6]:
years = predicted_rankings['year'].unique()

new_teams = ["Lucknow Super Giants", "Gujarat Titans"]

new_rows = []

for year in range(2018,2022):
    for team in new_teams:
        new_rows.append({
            'team_name': team,
            'rank_pm1': 0,
            'rank_pm2': 0,
            'rank_pm3': 0,
            'year': year
        })

new_teams_df = pd.DataFrame(new_rows)

predicted_rankings = pd.concat([predicted_rankings, new_teams_df], ignore_index=True)

predicted_rankings.head()

Unnamed: 0,team_name,rank_pm1,rank_pm2,rank_pm3,year
0,Kolkata Night Riders,1,2,2,2018
1,Mumbai Indians,4,1,7,2018
2,Kings XI Punjab,3,6,3,2018
3,Sunrisers Hyderbad,2,3,5,2018
4,Rajasthan Royals,6,5,8,2018


The reason I give zero to the ranking for the expansion teams is because I gave them actual rank 0 aswell. So whatever the weights, the loss will always be zero so they wont influence the LR but still give everything matching dimensions.

In [10]:
# Reshape actual ranking data for merging
ranking_melted = ranking.melt(id_vars=['Team'], 
                              value_vars=['2018 Actual Rank', '2019 Actual Rank', '2020 Actual Rank', '2021 Actual Rank', '2022 Actual Rank', '2023 Actual Rank'],
                              var_name='Year', value_name='Actual Rank')

# Extract the year from the 'Year' column
ranking_melted['Year'] = ranking_melted['Year'].str.extract('(\d+)').astype(int)

# Merge metrics rankings and actual rankings
data = pd.merge(predicted_rankings, ranking_melted, left_on=['team_name', 'year'], right_on=['Team', 'Year'])

# Drop unnecessary columns
data = data.drop(columns=['Team', 'year'])

print(data)

                      team_name  rank_pm1  rank_pm2  rank_pm3  Year  \
0          Kolkata Night Riders         1         2         2  2018   
1                Mumbai Indians         4         1         7  2018   
2               Kings XI Punjab         3         6         3  2018   
3            Sunrisers Hyderbad         2         3         5  2018   
4              Rajasthan Royals         6         5         8  2018   
5                Delhi Capitals         7         4         4  2018   
6   Royal Challengers Bangalore         5         7         1  2018   
7           Chennai Super Kings         8         8         6  2018   
8          Kolkata Night Riders         1         1         1  2019   
9           Chennai Super Kings         2         2         2  2019   
10               Mumbai Indians         3         3         3  2019   
11               Delhi Capitals         4         4         4  2019   
12              Kings XI Punjab         5         5         5  2019   
13    

In [15]:
X = data[['rank_pm1', 'rank_pm2', 'rank_pm3']]
y = data['Actual Rank']

# Use Non-Negative Least Squares (NNLS) to fit the model
weights, residual = nnls(X, y)

print("Weights: ", weights)
print("Residual: ", residual)

# Use the model to predict rankings
predicted_rankings = X.dot(weights)

# Evaluate the model
mse = mean_squared_error(y, predicted_rankings)
print("Mean Squared Error: ", mse)

# Print the actual and predicted rankings
result = data[['Year', 'Actual Rank', 'team_name']].copy()
result['Predicted Rank'] = predicted_rankings
print(result)

Weights:  [0.56436405 0.21142287 0.        ]
Residual:  20.212269778381522
Mean Squared Error:  7.427924538074088
    Year  Actual Rank                    team_name  Predicted Rank
0   2018            3         Kolkata Night Riders        0.987210
1   2018            5               Mumbai Indians        2.468879
2   2018            7              Kings XI Punjab        2.961629
3   2018            1           Sunrisers Hyderbad        1.762997
4   2018            4             Rajasthan Royals        4.443299
5   2018            8               Delhi Capitals        4.796240
6   2018            6  Royal Challengers Bangalore        4.301780
7   2018            2          Chennai Super Kings        6.206295
8   2019            5         Kolkata Night Riders        0.775787
9   2019            2          Chennai Super Kings        1.551574
10  2019            1               Mumbai Indians        2.327361
11  2019            3               Delhi Capitals        3.103148
12  2019       