In [1]:
import pandas as pd
from scipy.optimize import nnls
from sklearn.metrics import mean_squared_error

In [2]:
predicted_points = pd.read_csv('predicted_rankings_inverted.csv')
if 'Unnamed: 0' in predicted_points.columns:
    predicted_points = predicted_points.drop(columns=['Unnamed: 0'])
predicted_points.head()

Unnamed: 0,team_name,rank_pm1,rank_pm2,rank_pm3,year
0,Chennai Super Kings,7,5,8,2008
1,Rajasthan Royals,5,7,6,2008
2,Royal Challengers Bangalore,1,1,3,2008
3,Kolkata Knight Riders,2,2,5,2008
4,Delhi Capitals,4,6,7,2008


In [3]:
points = pd.read_csv('actual_points.csv')
#Replaces 99 with 0 points for teams that did not play in that particular year
points = points.replace(99, 0)
points.head()

Unnamed: 0,Team,points_2008,points_2009,points_2010,points_2011,points_2012,points_2013,points_2014,points_2015,points_2016,points_2017,points_2018,points_2019,points_2020,points_2021,points_2022,points_2023
0,Kochi Tuskers Kerala,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0
1,Gujarat Titans,0,0,0,0,0,0,0,0,18,8,0,0,0,0,20,20
2,Lucknow Super Giants,0,0,0,9,8,8,0,0,10,18,0,0,0,0,18,17
3,Chennai Super Kings,16,17,14,18,17,22,18,18,0,0,18,18,12,18,8,17
4,Mumbai Indians,14,11,20,18,20,22,14,16,14,20,12,18,18,14,8,16


In [4]:
years = predicted_points['year'].unique()
    
team_rr = "Rajasthan Royals"
team_csk = "Chennai Super Kings"  
team_gt = "Gujarat Titans"
team_lsg = "Lucknow Super Giants"
team_ktk = "Kochi Tuskers Kerala"

new_rows = []
#Adding the disqualified teams only for the years they were nt disqualified
for year in range(2016, 2017):
    new_rows.append({
        'team_name': team_rr,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })
    new_rows.append({
        'team_name': team_csk,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })


#LSG did not exist between 08-10 and 14-15 and 17-21
for year in range(2008, 2010):
        new_rows.append({
        'team_name': team_lsg, 
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })

for year in range(2014,2015):
        new_rows.append({
        'team_name': team_lsg,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })
# LSG and GT did not exist between 2018 and 2021
for year in range(2018,2021):
        new_rows.append({
        'team_name': team_lsg,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })
        new_rows.append({
        'team_name': team_gt,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })
        
# Gujarat Titans Inactive Years
for year in range(2008,2015):
        new_rows.append({
        'team_name': team_gt,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })
        

#Adding Kochi Tuskers Kerala for the year they were active
for year in range(2008, 2010):
    new_rows.append({
        'team_name': team_ktk,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })
    
for year in range(2012, 2023):
    new_rows.append({
        'team_name': team_ktk,
        'rank_pm1': 0,
        'rank_pm2': 0,
        'rank_pm3': 0,
        'year': year
    })

new_teams_df = pd.DataFrame(new_rows)

predicted_points = pd.concat([predicted_points, new_teams_df], ignore_index=True)

predicted_points.head()


Unnamed: 0,team_name,rank_pm1,rank_pm2,rank_pm3,year
0,Chennai Super Kings,7,5,8,2008
1,Rajasthan Royals,5,7,6,2008
2,Royal Challengers Bangalore,1,1,3,2008
3,Kolkata Knight Riders,2,2,5,2008
4,Delhi Capitals,4,6,7,2008


In [5]:
points_melted = points.melt(id_vars=['Team'], 
                              value_vars=['points_2008', 'points_2009', 'points_2010', 'points_2011', 'points_2012', 'points_2013', 'points_2014', 'points_2015', 'points_2016', 'points_2017', 'points_2018', 'points_2019', 'points_2020', 'points_2021', 'points_2022', 'points_2023'],
                              var_name='Year', value_name='Actual Points')

# Extract the year from the 'Year' column
points_melted['Year'] = points_melted['Year'].str.extract('(\d+)').astype(int)

# Merge metrics rankings and actual rankings
data = pd.merge(predicted_points, points_melted, left_on=['team_name', 'year'], right_on=['Team', 'Year'])

# Drop unnecessary columns
data = data.drop(columns=['Team', 'year'])

print(data)

                       team_name  rank_pm1  rank_pm2  rank_pm3  Year  \
0            Chennai Super Kings         7         5         8  2008   
1               Rajasthan Royals         5         7         6  2008   
2    Royal Challengers Bangalore         1         1         3  2008   
3                 Delhi Capitals         4         6         7  2008   
4                Kings XI Punjab         8         8         4  2008   
..                           ...       ...       ...       ...   ...   
147         Kochi Tuskers Kerala         0         0         0  2018   
148         Kochi Tuskers Kerala         0         0         0  2019   
149         Kochi Tuskers Kerala         0         0         0  2020   
150         Kochi Tuskers Kerala         0         0         0  2021   
151         Kochi Tuskers Kerala         0         0         0  2022   

     Actual Points  
0               16  
1               22  
2                8  
3               15  
4               20  
..       

In [6]:
X = data[['rank_pm1', 'rank_pm2', 'rank_pm3']]
y = data['Actual Points']

# Use Non-Negative Least Squares (NNLS) to fit the model
weights, residual = nnls(X, y)

print("Weights: ", weights)
print("Residual: ", residual)

# Use the model to predict rankings
predicted_points = X.dot(weights)

# Evaluate the model
mse = mean_squared_error(y, predicted_points)
print("Mean Squared Error: ", mse)

# Print the actual and predicted rankings
result = data[['Year', 'Actual Points', 'team_name']].copy()
result['Predicted Points'] = predicted_points
print(result)

Weights:  [0.55152392 1.15864359 1.08330233]
Residual:  45.407634673847326
Mean Squared Error:  13.564824254431468
     Year  Actual Points                    team_name  Predicted Points
0    2008             16          Chennai Super Kings         18.320304
1    2008             22             Rajasthan Royals         17.367939
2    2008              8  Royal Challengers Bangalore          4.960074
3    2008             15               Delhi Capitals         16.741073
4    2008             20              Kings XI Punjab         18.014549
..    ...            ...                          ...               ...
147  2018              0         Kochi Tuskers Kerala          0.000000
148  2019              0         Kochi Tuskers Kerala          0.000000
149  2020              0         Kochi Tuskers Kerala          0.000000
150  2021              0         Kochi Tuskers Kerala          0.000000
151  2022              0         Kochi Tuskers Kerala          0.000000

[152 rows x 4 column

In [7]:
import os
directory = 'C:/Users/julia/Downloads/Performance Metric Phase III (2)/Performance Metric Phase III'
filename = 'points_predicted_rankings.csv'
file_path = os.path.join(directory, filename)
result.to_csv(file_path)