In [213]:
import pandas as pd
import numpy as np
import seaborn as sns

In [214]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [215]:
match_file = "/content/gdrive/My Drive/ML_project/Matches_clean.csv"
ball_file ="/content/gdrive/My Drive/ML_project/ball_filtered.csv"
raw_match_file = "/content/gdrive/My Drive/ML_project/IPLMatches2008-2020.csv"

ball = pd.read_csv(ball_file)
matches = pd.read_csv(raw_match_file)
clean_matches = pd.read_csv(match_file)

# Append year column

In [224]:
matches['yr'] = matches['date'].apply(lambda d: int(d.split('-')[0]))
match_dfs={}
for yr in range(2008, 2020):
  match_dfs[yr] = matches.loc[matches['yr'] == yr]

id_yr = {}
for index, row in matches.iterrows():
  id_yr[row['id']] = row['yr']

# Calc Points - by Jahnvi


In [217]:
def retpos(ids):
  points={}
  lst=['bowled','run out','lbw','stumped','caught','caught and bowled','hit wicket']
  for index, row in ball.iterrows():
    if row['id'] in ids:
      if row['batsman_runs']==4:
        try:
          points[row['batsman']]+=2.5
        except:
          points[row['batsman']]=2.5
      
      elif row['batsman_runs']==6:
        try:
          points[row['batsman']]+=3.5
        except:
          points[row['batsman']]=3.5

      if row['total_runs']==0:
        try:
          points[row['bowler']]+=1
        except:
          points[row['bowler']]=1

      if row['dismissal_kind']=='caught' or row['dismissal_kind']=='stumped':
        try:
          points[row['fielder']]+=2.5
        except:
          points[row['fielder']]=2.5
      
      if row['dismissal_kind'] in lst:
        try:
          points[row['bowler']]+=3.5
        except:
          points[row['bowler']]=3.5
  return points    

In [225]:
points={}
for yr in match_dfs.keys():
  id_lst = list(match_dfs[yr]['id'])
  points[yr] = retpos(id_lst)

In [228]:
player_dict={}

for yr in points:
  for player in points[yr].keys():
    try:
      player_dict[player][yr]=points[yr][player]
    except:
      player_dict[player]={}
      player_dict[player][yr]=points[yr][player]

# Find Team Players


In [229]:
team_players = {}
for index, row in ball.iterrows():
  if row['batting_team'] not in team_players.keys():
    team_players[row['batting_team']] = {}
    for yr in range (2008, 2021):
      team_players[row['batting_team']][yr] = []
    
  if row['bowling_team'] not in team_players.keys():
    team_players[row['bowling_team']] = {}
    for yr in range (2008, 2021):
      team_players[row['bowling_team']][yr] = []

  team_players[row['batting_team']][id_yr[row['id']]].append(row['batsman'])
  team_players[row['batting_team']][id_yr[row['id']]].append(row['non_striker'])
  team_players[row['bowling_team']][id_yr[row['id']]].append(row['bowler'])

for team in team_players.keys():
  for yr in team_players[team].keys():
    team_players[team][yr] = set(team_players[team][yr])

# Clean Data


In [230]:
clean_matches['yr'] = clean_matches['date'].apply(lambda d: int(d.split('-')[0]))
match_dfs={}
for yr in range(2008, 2020):
  match_dfs[yr] = clean_matches.loc[clean_matches['yr'] == str(yr)]
clean_matches = clean_matches.drop(['Unnamed: 0', 'date'], axis=1)
clean_matches.columns

Index(['id', 'city', 'player_of_match', 'venue', 'neutral_venue', 'team1',
       'team2', 'toss_winner', 'toss_decision', 'winner', 'result',
       'result_margin', 'umpire1', 'umpire2', 'yr'],
      dtype='object')

# Year-wise points of teams

In [231]:
team_year_points = {}
for team in team_players.keys():
  team_year_points[team] = {}
  for yr in team_players[team].keys():
    total_points = 0
    for player in team_players[team][yr]:
      try:
        total_points += player_dict[player][yr]
      except KeyError:
        total_points += 0
    team_year_points[team][yr] = total_points
team_year_points

{'Chennai Super Kings': {2008: 2032.0,
  2009: 1799.5,
  2010: 2108.5,
  2011: 1955.5,
  2012: 2197.5,
  2013: 2324.5,
  2014: 2064.0,
  2015: 2319.0,
  2016: 0,
  2017: 0,
  2018: 2225.5,
  2019: 2181.5,
  2020: 0},
 'Deccan Chargers': {2008: 1738.0,
  2009: 2094.0,
  2010: 2004.5,
  2011: 1757.0,
  2012: 1773.5,
  2013: 0,
  2014: 0,
  2015: 0,
  2016: 0,
  2017: 0,
  2018: 0,
  2019: 0,
  2020: 0},
 'Delhi Capitals': {2008: 0,
  2009: 0,
  2010: 0,
  2011: 0,
  2012: 0,
  2013: 0,
  2014: 0,
  2015: 0,
  2016: 0,
  2017: 0,
  2018: 0,
  2019: 2152.5,
  2020: 0},
 'Delhi Daredevils': {2008: 1796.5,
  2009: 1862.5,
  2010: 1766.0,
  2011: 1623.5,
  2012: 2190.5,
  2013: 1765.0,
  2014: 1465.5,
  2015: 1706.0,
  2016: 1587.0,
  2017: 1703.0,
  2018: 1790.5,
  2019: 0,
  2020: 0},
 'Gujarat Lions': {2008: 0,
  2009: 0,
  2010: 0,
  2011: 0,
  2012: 0,
  2013: 0,
  2014: 0,
  2015: 0,
  2016: 1869.5,
  2017: 1677.0,
  2018: 0,
  2019: 0,
  2020: 0},
 'Kings XI Punjab': {2008: 2006.5,
  2

# Append team points in data

In [232]:
clean_matches['team1_points']=0.0
clean_matches['team2_points']=0.0

for i in range(clean_matches.shape[0]):
  clean_matches.at[i, 'team1_points'] =  float(team_year_points[clean_matches.iloc[i]['team1']][clean_matches.iloc[i]['yr']])
  clean_matches.at[i, 'team2_points'] =  float(team_year_points[clean_matches.iloc[i]['team2']][clean_matches.iloc[i]['yr']])

In [233]:
clean_matches.to_csv('matches_with_points.csv')