In [117]:
import csv
import math

# Loads the CSV file
def load_data(filename):
    my_list = []
    with open(filename) as player_data:
        player_data_store = csv.reader(player_data, delimiter=',')
        next(player_data)
        for row in player_data_store:
                my_list.append(row)
        return my_list

In [118]:
# Helper methods for Performance Metric 2
def calc_bowling_tournament_average():
    sum = 0
    for i in range(len(player_list)):
        if not (math.isnan(player_list[i].bowling_avg)):
            sum += player_list[i].bowling_avg
        
    return sum / len(player_list)
    

def calc_batting_tournament_average():
    sum = 0
    for i in range(len(player_list)):
        if not (math.isnan(player_list[i].batting_avg)):
            sum += player_list[i].batting_avg
        
    return sum / len(player_list)
    

def calc_tournament_economy():
    sum = 0
    for i in range(len(player_list)):
        if not (math.isnan(player_list[i].bowling_sr)):
            sum += player_list[i].bowling_sr
        
    return sum / len(player_list)


def calc_tournament_strike_rate():
    sum = 0
    for i in range(len(player_list)):
        if not (math.isnan(player_list[i].batting_sr)):
            sum += player_list[i].batting_sr
    
    return sum / len(player_list)
    

In [119]:
def metric1(batting_average, batting_strike_rate, bowling_average, economy_rate):
    if batting_average == 0 or batting_strike_rate == 0 or math.isnan(batting_average) or math.isnan(bowling_average):
        batting_metric = 0
    else: batting_metric = (batting_average * batting_strike_rate)/100
        
    if bowling_average == 0 or economy_rate == 0 or math.isnan(bowling_average) or math.isnan(economy_rate):
        bowling_metric = 0
    else: bowling_metric = (bowling_average * economy_rate)/100
        
    return batting_metric + bowling_metric

def metric2(batting_player_average, player_strike_rate, bowling_player_average, player_economy, runs, wickets):
    
    batting_tournament_average = calc_batting_tournament_average()
    tournament_strike_rate = calc_tournament_strike_rate()
    bowling_tournament_average = calc_tournament_strike_rate()
    tournament_economy = calc_tournament_economy()
    
    batting_metric = ((batting_player_average/batting_tournament_average) + (player_strike_rate/tournament_strike_rate)**2) * runs
    
    if bowling_player_average == 0:
        bowling_metric = 0
    else:
        bowling_metric = ((bowling_tournament_average/bowling_player_average) + (tournament_economy/player_economy)**2) * wickets
    
    
    if math.isnan(batting_metric):
        batting_metric = 0

    if math.isnan(bowling_metric):
        bowling_metric = 0
    
    return batting_metric + bowling_metric

def metric3(batting_average, batting_strike_rate, bowling_strike_rate, bowling_average, economy):
    alpha = 0.75
    batting_metric = batting_average-alpha*(batting_strike_rate**alpha)
    bowling_metric = bowling_strike_rate-alpha*(bowling_average**alpha)

    if math.isnan(batting_metric):
        batting_metric = 0
        
    if math.isnan(bowling_metric):
        bowling_metric = 0
        
    return batting_metric + bowling_metric

In [140]:
def calc_pm_for_player(player):
    """
    Calculates all three performance metrics for each player object
    Input: Player object
    Output: Displays performance metric per player
    """
    
    pm1 = metric1(player.batting_avg, player.batting_sr, player.bowling_avg, player.economy)
    player.set_pm(pm1, 'pm1')
    pm2 = metric2(player.batting_avg, player.batting_sr, player.bowling_avg, player.economy, player.runs, player.wickets)
    player.set_pm(pm2, 'pm2')
    pm3 = metric3(player.batting_avg, player.batting_sr, player.bowling_sr, player.bowling_avg, player.economy)
    player.set_pm(pm3, 'pm3')
    #print(player.name)
    #player.display_performance_metrics()
    

In [141]:
# Creates player objects by reading the CSV file

import pandas as pd

player_data_2008 = pd.read_csv('player_stats08.csv')
player_data_2009 = pd.read_csv('player_stats09.csv')
player_data_2010 = pd.read_csv('player_stats10.csv')
player_data_2011 = pd.read_csv('player_stats11.csv')
player_data_2012 = pd.read_csv('player_stats12.csv')
player_data_2013 = pd.read_csv('player_stats13.csv')
player_data_2014 = pd.read_csv('player_stats14.csv')
player_data_2015 = pd.read_csv('player_stats15.csv')
player_data_2016 = pd.read_csv('player_stats16.csv')
player_data_2017 = pd.read_csv('player_stats17.csv')
player_data_2018 = pd.read_csv('player_stats18.csv')
player_data_2019 = pd.read_csv('player_stats19.csv')
player_data_2020 = pd.read_csv('player_stats20.csv')
player_data_2021 = pd.read_csv('player_stats21.csv')
player_data_2022 = pd.read_csv('player_stats22.csv')
player_data_2023 = pd.read_csv('player_stats23.csv')


years = 16

player_data_list = [player_data_2008, player_data_2009, player_data_2010, player_data_2011, player_data_2012, player_data_2013, player_data_2014, player_data_2015, player_data_2016, player_data_2017, player_data_2018, player_data_2019, player_data_2020, player_data_2021, player_data_2022, player_data_2023]
#code below very dumb but maps index in the list to year, key is the index value is the year
player_data_year_dict = {
    0 : 2008,
    1 : 2009,
    2 : 2010,
    3 : 2011,
    4 : 2012,
    5 : 2013,
    6 : 2014,
    7 : 2015,
    8 : 2016,
    9 : 2017,
    10: 2018,
    11: 2019,
    12: 2020,
    13: 2021,
    14: 2022,
    15: 2023
}

player_data_in_teams = []

# ensures that only players with a team are considered
for player_data in player_data_list:
    player_data_in_teams.append(player_data[player_data['team'] != 'None'])



In [142]:
class Player:
    
    def __init__(self, name, team, runs, wickets, bowled, conceded, batting_avg, batting_sr, bowling_avg, bowling_sr, economy):
        self.name = name
        self.team = team
        self.runs = runs
        self.wickets = wickets
        self.bowled = bowled
        self.conceded = conceded
        self.batting_avg = batting_avg
        self.batting_sr = batting_sr
        if self.wickets == 0:
            self.bowling_avg = 0
        else:
            self.bowling_avg = self.conceded/self.wickets
        self.bowling_sr = bowling_sr
        self.economy = economy
        self.pm1 = 0
        self.pm2 = 0
        self.pm3 = 0
        
        
    def set_pm(self, pm, label):
        """
        Initializes the three performance metrics for each Player object
        """
        if label == 'pm1':
            self.pm1 = pm
        if label == 'pm2':
            self.pm2 = pm
        if label == 'pm3':
            self.pm3 = pm

    def get_team(self):
        return self.team
            
        
    def display_performance_metrics(self):
        print(self.pm1)
        print(self.pm2)
        print(self.pm3)
        
    def get_team_name(self):
        return team
        

In [143]:
all_player_list = []

for player_data in player_data_in_teams:
    player_list = []
    for index, row in player_data.iterrows():
        # Creates a Player object using the column values from the CSV
        player = Player(
            name=row['player'], 
            team=row['team'], 
            runs=row['runs'], 
            wickets=row['wickets'], 
            bowled=row['overs bowled'], 
            conceded=row['runs conceded'], 
            batting_avg=row['batting average'], 
            batting_sr=row['batting SR'], 
            bowling_avg=row['bowling average'], 
            bowling_sr=row['bowling SR'], 
            economy=row['economy'], 
        )
        
         # Adds the Player object to the list
        player_list.append(player)
    all_player_list.append(player_list)

#now all_player_list is a matrix where each row represents the player data for 1 year
print(len(all_player_list))
    

16


In [144]:
class Team:

    def __init__(self, team_name):
        self.team_name = team_name
        self.players = []
        self.combined_pm1 = 0
        self.combined_pm2 = 0
        self.combined_pm3 = 0
        self.rank_pm1 = 0
        self.rank_pm2 = 0
        self.rank_pm3 = 0
        
        
    def get_team_name(self):
        return self.team_name

In [145]:
def find_team(team_name, teams):
    """
    Helper function that returns the team object based on the String team name out of the list of team objects
    Input: String team name, list of of objects of type Team
    Output: Corresponding Team object
    """
    for team in teams:
        if team.team_name == team_name:
            return team

In [146]:
# Prints each individual player and their three performance metrics
for year in range(years):
    for player in range(len(all_player_list[year])):
        calc_pm_for_player(all_player_list[year][player])

In [147]:
# The following piece of code creates the respective teams and adds all three performance metrics to a team based on  
# which players the team has 
def calc_pms(player_list): #players for a specific year
    list_of_team_names = [] # list of team names as strings
    team_list = [] # list of teams as objects of type Team
    #count = 0
    for i in range(len(player_list)): 
        team_name = player_list[i].team # retrieval of team name per individual player
        # Checks if team already exists
        if team_name not in list_of_team_names: 
            # If not a new team is created and the performance metrics are initialized from scratch
            team = Team(team_name)
            team.players.append(player_list[i])
            team.combined_pm1 = player_list[i].pm1
            team.combined_pm2 = player_list[i].pm2
            team.combined_pm3 = player_list[i].pm3
            team_list.append(team)
            list_of_team_names.append(team_name)
        if team_name in list_of_team_names: 
            # If it does then the performance metrics are simply summed to the already existing initialized values
            team = find_team(team_name, team_list)
            team.players.append(player_list[i])
            team.combined_pm1 += player_list[i].pm1
            team.combined_pm2 += player_list[i].pm2
            team.combined_pm3 += player_list[i].pm3
   
    # This is done until the performance metric of each individual player has been added to the correct team
    # Now team list contains all teams and the team object contains the performance metrics    
    return team_list


In [148]:
team_dict = {}
for i in range(years):
    #team_list = []
    team_dict[player_data_year_dict[i]] = calc_pms(all_player_list[i])# appends the team list for a certain year to the dic


In [158]:
def calc_all_ranks():
    rank_df_list = []
    
    for i in range(years):
        
        sorted_team_list_pm1 = sorted(team_dict[player_data_year_dict[i]], key=lambda x: x.combined_pm1, reverse=False)
    
        for rank, team in enumerate(sorted_team_list_pm1, start=1):
            team.rank_pm1 = rank
            
        sorted_team_list_pm2 = sorted(team_dict[player_data_year_dict[i]], key=lambda x: x.combined_pm2, reverse=False)
    
        for rank, team in enumerate(sorted_team_list_pm2, start=1):
            team.rank_pm2 = rank
            
        sorted_team_list_pm3 = sorted(team_dict[player_data_year_dict[i]], key=lambda x: x.combined_pm3, reverse=False)
    
        for rank, team in enumerate(sorted_team_list_pm3, start=1):
            team.rank_pm3 = rank
        #now for one year we have the expected rankings for all teams based on their performence metrics
        #make this into a df and append it to rank_df_list
        
        data = {
            'team_name': [],
            'rank_pm1': [],
            'rank_pm2': [],
            'rank_pm3': [],
            'year': []
        }

        for team in team_dict[player_data_year_dict[i]]:
            data['team_name'].append(team.get_team_name())
            print(team.get_team_name())
            data['rank_pm1'].append(team.rank_pm1)
            print(team.combined_pm1)
            data['rank_pm2'].append(team.rank_pm2)
            print(team.combined_pm2)
            data['rank_pm3'].append(team.rank_pm3)
            print(team.combined_pm3)
            data['year'].append(player_data_year_dict[i])
        
        year_rank_df = pd.DataFrame(data)
        
        rank_df_list.append(year_rank_df)

    
    return rank_df_list

final_list = calc_all_ranks()

Chennai Super Kings
663.846830596766
9951.743511861276
171.90463030385857
Rajasthan Royals
499.91553894184756
10438.179249132843
22.011114579422383
Royal Challengers Bangalore
429.3395006783217
5525.707901528211
11.314637767796677
Kolkata Knight Riders
462.8771725873016
6323.2057898581925
6.030688362431741
Delhi Capitals
465.8284981363636
10208.988133235038
64.81991242779749
Kings XI Punjab
897.6048146754587
12840.926907559213
229.16994761517455
Mumbai Indians
463.1254366060606
7387.948207988245
-66.20474709998005
Deccan Chargers
542.2398064444444
8248.183519379538
75.15208929739237
Rajasthan Royals
311.0887751904762
4222.830513048143
-52.62196610284357
Chennai Super Kings
446.225552043956
8344.465125087923
50.829000758816804
Deccan Chargers
457.47891769471096
8962.348310817684
122.64666979985121
Royal Challengers Bangalore
497.9219297594627
7029.727323716843
81.74900813427273
Kolkata Knight Riders
510.3124175238095
5294.942219543894
234.5163302076026
Delhi Daredevils
409.7081891629073

In [159]:
#now concatinate all the dfs into 1 and download it as a csv 
predicted_rankings_df = pd.concat(final_list, ignore_index=True)

In [160]:
print(predicted_rankings_df)

                       team_name  rank_pm1  rank_pm2  rank_pm3  year
0            Chennai Super Kings         7         5         7  2008
1               Rajasthan Royals         5         7         4  2008
2    Royal Challengers Bangalore         1         1         3  2008
3          Kolkata Knight Riders         2         2         2  2008
4                 Delhi Capitals         4         6         5  2008
..                           ...       ...       ...       ...   ...
132               Delhi Capitals         1         1         2  2023
133          Sunrisers Hyderabad         5         2         8  2023
134               Gujarat Titans        10        10        10  2023
135          Chennai Super Kings         7         9         7  2023
136               Mumbai Indians         6         7         4  2023

[137 rows x 5 columns]


In [161]:
import os
directory = '/Users/Lenovo/OneDrive/Desktop/Performance Metric Phase III'
filename = 'predicted_rankings_inverted.csv'
file_path = os.path.join(directory, filename)
predicted_rankings_df.to_csv(file_path)