In [66]:
import json
import numpy
import requests
from bs4 import BeautifulSoup
from espncricinfo.exceptions import MatchNotFoundError, NoScorecardError
import pandas as pd

class Match(object):
    def __init__(self, match_id):
        self.match_id = match_id
        self.match_url = f"https://site.api.espn.com/apis/site/v2/sports/cricket/8048/summary?event={match_id}"
        self.json_url = f"https://www.espncricinfo.com/matches/engine/match/{match_id}.json"
        self.json = self.get_json()
        self.json_api = self.get_json_api()
        self.description = self.get_description()
        self.full_scorecard_url = self.get_full_scorecard_url()
        self.toss = self.get_toss()
        self.home_team = self.get_home_team()
        self.away_team = self.get_away_team()
        self.batting_first = self.get_batting_first()
        self.batting_second = self.get_batting_second()
        # self.team = self.get_team()
        self.team1_batting_df = self.get_team1_batting_df()
        self.team2_batting_df = self.get_team2_batting_df()
        self.team1_bowling_df = self.get_team1_bowling_df()
        self.team2_bowling_df = self.get_team2_bowling_df()
        self.team1_dismissals_df = self.decider1()
        self.team2_dismissals_df = self.decider2()
        self.team1dismiss = self.team1_dismissals_df
        self.team2dismiss = self.team2_dismissals_df
        self.team1batting = self.generate_batting_points(self.team1_batting_df, self.team1_dismissals_df)
        self.team2batting = self.generate_batting_points(self.team2_batting_df, self.team2_dismissals_df)
        self.team1bowling = self.generate_bowling_points(self.team1_bowling_df)
        self.team2bowling = self.generate_bowling_points(self.team2_bowling_df)
        self.dream11_points = self.get_dream11_points(self.team1batting, self.team2batting, self.team1bowling, self.team2bowling, self.team1dismiss, self.team2dismiss)
        
        
    def get_json(self):
        r = requests.get(self.json_url)
        if r.status_code == 404:
            raise MatchNotFoundError
        elif 'Scorecard not yet available' in r.text:
            raise NoScorecardError
        else:
            return r.json()
    
    def get_json_api(self):
        r = requests.get(self.match_url)
        if r.status_code == 404:
            raise MatchNotFoundError
        elif 'Scorecard not yet available' in r.text:
            raise NoScorecardError
        else:
            return r.json()
        
    def get_full_scorecard_url(self):
        url  = self.description
        matchName = url.split(",")[1].split(":")[1].strip().split(" ")[:-2]
        firstHalf = '-'.join(matchName).lower().replace("-v-", "-vs-")
        matchNo = url.split(",")[1].split(":")[0].replace(" ","-").lower()
        finalurl = firstHalf + matchNo + "-" + str(self.match_id)
        return f"https://www.espncricinfo.com/series/indian-premier-league-2024-1410320/{finalurl}/full-scorecard"
    
    def get_description(self):
        return self.json['description']
    
    def get_toss(self):
        return self.json_api['gameInfo']['venue']['fullName']
  
    def get_home_team(self):
        a = self.json_api['rosters'][0]['team']
        return {'name':a['abbreviation'], 'fullName' : a['displayName'], 'id' : a['id']}
    
    def get_away_team(self):
        a = self.json_api['rosters'][1]['team']
        return {'name':a['abbreviation'], 'fullName' : a['displayName'], 'id' : a['id']}
    
    # def get_team(self, team):
    #     team = []
    #     for i in self.json_api['rosters'][0]['roster']:
    #          team.append(i['athlete']['name'])
    #     return team
    
    def get_batting_first(self):
        a = self.json_api['matchcards'][0]
        teamName = a['teamName']
        otherTeamName = [self.home_team['name'],self.away_team['name']]
        otherTeamName.remove(teamName)
        inningsNumber = a['inningsNumber']
        headline = a['headline']
        if(headline == 'Batting'):
            if(inningsNumber == '1'):
                return teamName
            else:
                return otherTeamName[0]
        else:
            if(inningsNumber == '2'):
                return teamName
            else:
                return otherTeamName[0]

    def get_batting_second(self):
        a = self.json_api['matchcards'][0]
        teamName = a['teamName']
        otherTeamName = [self.home_team['name'],self.away_team['name']]
        otherTeamName.remove(teamName)
        inningsNumber = a['inningsNumber']
        headline = a['headline']
        if(headline == 'Batting'):
            if(inningsNumber == '1'):
                return otherTeamName[0]
            else:
                return teamName
        else:
            if(inningsNumber == '2'):
                return otherTeamName[0]
            else:
                return teamName

    def get_team1_batting_df(self):
        r = requests.get(self.full_scorecard_url)
        soup = BeautifulSoup(r.content, 'html.parser')
        df = pd.DataFrame(columns= ['Name', "Wicket", "Runs", "Balls", "Fours", "Sixes"])
        tables = soup.find_all('table')
        brows = tables[0].findAll('tr')[1:-4]
        for brow in brows:
            x  = brow.findAll('td')
            if len(x) == 1:
                continue
            name = x[0].text
            name2 = name.replace('(c)','')
            name3 = name2.replace('†', '')
            name3 = name3.strip()
            df.loc[len(df)] = [name3, x[1].text, x[2].text, x[3].text, x[5].text, x[6].text]
        df[["Runs", "Balls", "Fours", "Sixes"]] = df[["Runs", "Balls", "Fours", "Sixes"]].astype(int)
        df["Strike Rate"] = (df["Runs"] / df["Balls"] * 100).round(2)
        return df

    def get_team2_batting_df(self):
        r = requests.get(self.full_scorecard_url)
        soup = BeautifulSoup(r.content, 'html.parser')
        df = pd.DataFrame(columns= ['Name', "Wicket", "Runs", "Balls", "Fours", "Sixes"])
        tables = soup.find_all('table')
        brows = tables[2].findAll('tr')[1:-4]
        for brow in brows:
            x  = brow.findAll('td')
            if len(x) == 1:
                continue
            name = x[0].text
            name2 = name.replace('(c)','')
            name3 = name2.replace('†', '')
            name3 = name3.strip()
            df.loc[len(df)] = [name3, x[1].text, x[2].text, x[3].text, x[5].text, x[6].text]
        df[["Runs", "Balls", "Fours", "Sixes"]] = df[["Runs", "Balls", "Fours", "Sixes"]].astype(int)
        df["Strike Rate"] = (df["Runs"] / df["Balls"] * 100).round(2)
        return df
    
    def get_team2_bowling_df(self):
        table = pd.read_html(self.full_scorecard_url)
        df = table[1]
        df = df[~df['BOWLING'].str[0].str.isdigit()]
        df.rename(columns = {'O':'Overs', 'M' : 'Maidens', 'R' : 'Runs', 'W' : 'Wickets', 'ECON' : 'Economy'} , inplace= True)
        df[["Overs", "Economy",]] = df[["Overs","Economy"]].astype(float)
        df[["Maidens", "Runs", "Wickets", "0s", "4s", "6s", "WD", "NB"]] = df[["Maidens", "Runs", "Wickets", "0s", "4s", "6s", "WD", "NB"]].astype("int")
        return df
    
    def get_team1_bowling_df(self):
        table = pd.read_html(self.full_scorecard_url)
        df = table[3]
        df = df[~df['BOWLING'].str[0].str.isdigit()]
        df.rename(columns = {'O':'Overs', 'M' : 'Maidens', 'R' : 'Runs', 'W' : 'Wickets', 'ECON' : 'Economy'} , inplace= True)
        df[["Overs", "Economy",]] = df[["Overs","Economy"]].astype(float)
        df[["Maidens", "Runs", "Wickets", "0s", "4s", "6s", "WD", "NB"]] = df[["Maidens", "Runs", "Wickets", "0s", "4s", "6s", "WD", "NB"]].astype("int")
        return df
    
    def generate_batting_points(self,x, y):
        x = x.merge(y)
        x['Fantasy Points'] = x['Runs'] + x['Fours'] + x['Sixes']*2
        x['Fantasy Points'] = numpy.where(x['Runs'] >= 50, x['Fantasy Points'] + 8, x['Fantasy Points'])
        x['Fantasy Points'] = numpy.where(x['Runs'] >= 100, x['Fantasy Points'] + 8, x['Fantasy Points'])
        x['Fantasy Points'] = numpy.where((x['Runs'] == 0) & (x['Wicket'] != 'not out '), x['Fantasy Points'] - 2 , x['Fantasy Points'])
        x['Fantasy Points'] = numpy.where((x['Strike Rate'] <=70 ) & (x['Balls'] >= 10) & (x['Position'] != 'Bowler'), x['Fantasy Points'] - 2 , x['Fantasy Points'])
        x['Fantasy Points'] = numpy.where((x['Strike Rate'] <60) & (x['Balls'] >= 10) & (x['Position'] != 'Bowler'), x['Fantasy Points'] - 2 , x['Fantasy Points'])
        x['Fantasy Points'] = numpy.where((x['Strike Rate'] <50 ) & (x['Balls'] >= 10) & (x['Position'] != 'Bowler'), x['Fantasy Points'] - 2 , x['Fantasy Points'])
        x.drop(x.iloc[:, 1:11], inplace=True, axis=1)
        return x

    def generate_bowling_points(self, y):
        y['Fantasy Points'] = y['Wickets']*25 + y['Maidens']*8 
        y['Fantasy Points'] = numpy.where(y['Wickets'] >= 4, y['Fantasy Points'] + 8, y['Fantasy Points'])
        y['Fantasy Points'] = numpy.where(y['Wickets'] >= 5, y['Fantasy Points'] + 8, y['Fantasy Points'])
        y['Fantasy Points'] = numpy.where((y['Economy'] <= 6) & (y['Overs'] >=2) , y['Fantasy Points'] +2, y['Fantasy Points'])
        y['Fantasy Points'] = numpy.where((y['Economy'] <= 5) & (y['Overs'] >=2), y['Fantasy Points'] +2, y['Fantasy Points'])
        y.drop(y.iloc[:, 1:11], inplace=True, axis=1)
        y.rename(columns = {'BOWLING':'Name'}, inplace = True)
        return y
    
    def fielding1(self, team1points,team2dismiss):
        catcher = team2dismiss.loc[team2dismiss['Dismissal Type'] == 'c']
        catcher = catcher['Catcher/Runout']
        catcher = dict(zip(*numpy.unique(catcher, return_counts=True)))
        output = pd.DataFrame(columns= ["Name", "Fantasy Points"])
        for i in catcher:
            output.loc[len(output)] = [i, catcher[i] * 8]
        st = team2dismiss.loc[team2dismiss['Dismissal Type'] == 'st']
        st = st['Catcher/Runout']
        st = dict(zip(*numpy.unique(st, return_counts=True)))
        output3 = pd.DataFrame(columns= ["Name", "Fantasy Points"])
        for i in st:
            output3.loc[len(output3)] = [i, st[i] * 12]
        runout = team2dismiss.loc[team2dismiss['Dismissal Type'] == 'run out']
        runout = runout['Catcher/Runout']
        l = {}
        for i in runout:
            try:
                if len(i.split('/')) > 1:
                    l[i.split('/')[0]] = 6
                    l[i.split('/')[1]] = 6
                else:
                    l[i] = 12
            except:
                continue
        output2 = pd.DataFrame(columns= ["Name", "Fantasy Points"])
        for i in l:
            output2.loc[len(output2)] = [i, l[i]]
        a = pd.concat([team1points, output]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        b = pd.concat([a, output3]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        c = pd.concat([b, output2]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        return c

    def fielding2(self,team2points,team1dismiss):
        catcher = team1dismiss.loc[team1dismiss['Dismissal Type'] == 'c']
        catcher = catcher['Catcher/Runout']
        catcher = dict(zip(*numpy.unique(catcher, return_counts=True)))
        output = pd.DataFrame(columns= ["Name", "Fantasy Points"])
        for i in catcher:
            output.loc[len(output)] = [i, catcher[i] * 8]
        st = team1dismiss.loc[team1dismiss['Dismissal Type'] == 'st']
        st = st['Catcher/Runout']
        st = dict(zip(*numpy.unique(st, return_counts=True)))
        output3 = pd.DataFrame(columns= ["Name", "Fantasy Points"])
        for i in st:
            output3.loc[len(output3)] = [i, st[i] * 12]
        runout = team1dismiss.loc[team1dismiss['Dismissal Type'] == 'run out']
        runout = runout['Catcher/Runout']
        l = {}
        for i in runout:
            try:
                if len(i.split('/')) > 1:
                    l[i.split('/')[0]] = 6
                    l[i.split('/')[1]] = 6
                else:
                    l[i] = 12
            except:
                continue
        output2 = pd.DataFrame(columns= ["Name", "Fantasy Points"])
        for i in l:
            output2.loc[len(output2)] = [i, l[i]]
        a = pd.concat([team2points, output]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        b = pd.concat([a, output2]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        c = pd.concat([b, output3]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        return c

    def generate_points1(self,team1batting,team2dismiss, team1bowling):
        team1points = pd.concat([team1batting, team1bowling]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        return self.fielding1(team1points, team2dismiss)

    def generate_points2(self,team2batting,team1dismiss, team2bowling):
        team2points = pd.concat([team2batting, team2bowling]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
        return self.fielding2(team2points, team1dismiss)
                         
    def get_dream11_points(self, team1batting, team2batting, team1bowling, team2bowling, team1dismiss, team2dismiss):
        a = self.generate_points1(team1batting, team2dismiss, team1bowling)
        b = self.generate_points2(team2batting, team1dismiss, team2bowling)
        return pd.concat([a, b]).groupby(["Name"], as_index=False)["Fantasy Points"].sum().sort_values(by = "Fantasy Points", ascending = False)
    
    
    def get_team1_dismissals_df(self):
        df = pd.DataFrame(columns= ['Name', "Position", "Dismissal Type", "Bowler", "Catcher/Runout"])
        if self.home_team['name'] == self.batting_first:
            z = 0
        else:
            z = 1
        for i in self.json_api['rosters'][0]['roster']:
            name = i['athlete']['name']
            position = i['athlete']['position']['name']
            dismissalType = i['linescores'][z]['statistics']['categories'][0]['stats'][4]['value']
            try:
                if(len(i['linescores'][z]['statistics']['batting']['outDetails']['fielders']) > 1):
                    catcher = i['linescores'][z]['statistics']['batting']['outDetails']['fielders'][0]['athlete']['displayName'] + '/' + i['linescores'][0]['statistics']['batting']['outDetails']['fielders'][1]['athlete']['displayName']
                else:
                    catcher = i['linescores'][z]['statistics']['batting']['outDetails']['fielders'][0]['athlete']['displayName']
            except:
                catcher = None
            try:
                bowler = i['linescores'][z]['statistics']['batting']['outDetails']['bowler']['displayName']
            except:
                bowler = None
            df.loc[len(df)] = [name, position, dismissalType,bowler, catcher]
        return df
    
    def get_team2_dismissals_df(self):
        df = pd.DataFrame(columns= ['Name', "Position", "Dismissal Type", "Bowler", "Catcher/Runout"])
        if self.home_team['name'] == self.batting_first:
            z = 1
        else:
            z = 0
        for i in self.json_api['rosters'][1]['roster']:
            name = i['athlete']['name']
            position = i['athlete']['position']['name']
            dismissalType = i['linescores'][z]['statistics']['categories'][0]['stats'][4]['value']
            try:
                if(len(i['linescores'][z]['statistics']['batting']['outDetails']['fielders']) > 1):
                    catcher = i['linescores'][z]['statistics']['batting']['outDetails']['fielders'][0]['athlete']['displayName'] + '/' + i['linescores'][0]['statistics']['batting']['outDetails']['fielders'][1]['athlete']['displayName']
                else:
                    catcher = i['linescores'][z]['statistics']['batting']['outDetails']['fielders'][0]['athlete']['displayName']
            except:
                catcher = None
            try:
                bowler = i['linescores'][z]['statistics']['batting']['outDetails']['bowler']['displayName']
            except:
                bowler = None
            df.loc[len(df)] = [name, position, dismissalType,bowler, catcher]
        return df
    def decider1(self):
        if self.home_team['name'] == self.batting_first:
            return self.get_team1_dismissals_df()
        else:
            return self.get_team2_dismissals_df()
        
    def decider2(self):
        if self.home_team['name'] == self.batting_first:
            return self.get_team2_dismissals_df()
        else:
            return self.get_team1_dismissals_df()





class IPL(object):
    def __init__(self):
        # self.match_id = season
        self.season_url = self.get_season_url()
        self.match_list = self.get_match_list()

    def get_season_url(self):
        return "https://www.espncricinfo.com/series/indian-premier-league-2024-1410320/match-schedule-fixtures-and-results"

    def get_match_list(self):
        r = requests.get(self.season_url)
        soup = BeautifulSoup(r.content, 'html.parser')
        matches = soup.find_all("div", {"class": "ds-p-0"})
        matches = matches[0]
        matches = matches.findAll('a')
        matchesList = []
        for match in matches:
            link = match.get('href')
            if(link.endswith("full-scorecard")):
                matchesList.append(link)
        df = pd.DataFrame(columns= ['Match Number', 'Home Team', 'Away Team', 'Match ID', 'Match Type', 'scoreboard url'])
        for i in matchesList:
            print(i)
            matchType = 'league'
            url = "https://www.espncricinfo.com" + i
            d = i.split("/")
            d = d[3]
            z = d.split("-")
            matchID = z[-1]
            z = z[:-2]
            matchNumber = z[-1]
            try:
                matchNumber = int(''.join(filter(str.isdigit, matchNumber)))
            except:
                qa = d.split("-")[-3:-1]
                try:
                    if(int(qa[1])):
                        matchNumber = qa[0]
                except:
                    matchNumber = qa[1]
            z = z[:-1]
            teams = ' '.join(z)
            teams = teams.split('vs')
            homeTeam = teams[0]
            awayTeam = teams[1]
            df.loc[len(df)] = [matchNumber, homeTeam, awayTeam, matchID, matchType, url]
        return df
        

In [67]:
a = IPL()
matchList = a.match_list

/series/indian-premier-league-2024-1410320/chennai-super-kings-vs-royal-challengers-bengaluru-1st-match-1422119/full-scorecard
/series/indian-premier-league-2024-1410320/punjab-kings-vs-delhi-capitals-2nd-match-1422120/full-scorecard
/series/indian-premier-league-2024-1410320/kolkata-knight-riders-vs-sunrisers-hyderabad-3rd-match-1422121/full-scorecard
/series/indian-premier-league-2024-1410320/rajasthan-royals-vs-lucknow-super-giants-4th-match-1422122/full-scorecard
/series/indian-premier-league-2024-1410320/gujarat-titans-vs-mumbai-indians-5th-match-1422123/full-scorecard
/series/indian-premier-league-2024-1410320/royal-challengers-bengaluru-vs-punjab-kings-6th-match-1422124/full-scorecard
/series/indian-premier-league-2024-1410320/chennai-super-kings-vs-gujarat-titans-7th-match-1422125/full-scorecard
/series/indian-premier-league-2024-1410320/sunrisers-hyderabad-vs-mumbai-indians-8th-match-1422126/full-scorecard
/series/indian-premier-league-2024-1410320/rajasthan-royals-vs-delhi-ca

In [72]:
match = Match(matchList.iloc[-3]["Match ID"])

fp = match.dream11_points

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns = {'O':'Overs', 'M' : 'Maidens', 'R' : 'Runs', 'W' : 'Wickets', 'ECON' : 'Economy'} , inplace= True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[["Overs", "Economy",]] = df[["Overs","Economy"]].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[["Maidens", "Runs", "Wickets", "0s", "4s", "6s", "WD", "NB"]] = df[["Maide

In [73]:
fp

Unnamed: 0,Name,Fantasy Points
14,Shreyas Iyer,87
6,Mitchell Starc,75
20,Venkatesh Iyer,72
10,Rahul Tripathi,72
8,Pat Cummins,61
9,Rahmanullah Gurbaz,51
19,Varun Chakravarthy,50
15,Sunil Narine,50
2,Andre Russell,39
5,Heinrich Klaasen,37
