# Dream 11

Here I have used web scraping to get the scorecard of match from ESPN cricinfo.
And used Pandas to manipulate the data the way I required.
This is the program similar to Dream11. You can choose your Team XI and captain & vice captain of your team.
As you know the Captain and Vice-captain will get awarded by 2x and 1.5x points respectively.
Point awarding is much more similar to the Dream11 

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np

def extract_batting_data(series_id, match_id):

    URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
    page = requests.get(URL)
    bs = BeautifulSoup(page.content, 'lxml')

    table_body=bs.find_all('tbody')
    batsmen_df = pd.DataFrame(columns=["Name","Desc","Runs", "Balls", "4s", "6s", "SR", "Team"])
    for i, table in enumerate(table_body[0:4:2]):
        rows = table.find_all('tr')
        for row in rows[::2]:
            cols=row.find_all('td')
            cols=[x.text.strip() for x in cols]
            if cols[0] == 'Extras':
                continue
            if len(cols) > 7:
                batsmen_df = batsmen_df.append(pd.Series(
                [re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip(), cols[1], 
                cols[2], cols[3], cols[5], cols[6], cols[7], i+1], 
                index=batsmen_df.columns ), ignore_index=True)
            else:
                batsmen_df = batsmen_df.append(pd.Series(
                [re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip(), cols[1], 
                0, 0, 0, 0, 0, i+1], index = batsmen_df.columns), ignore_index=True)
                
    for i in range(2):
        dnb_row = bs.find_all("tfoot")[i].find_all("div")
        for c in dnb_row:
            dnb_cols = c.find_all('span')
            dnb = [x.text.strip().split("(c)")[0] for x in dnb_cols]
            dnb = filter(lambda item: item, [re.sub(r"\W+", ' ', x).strip() for x in dnb])
            for dnb_batsman in dnb:
                batsmen_df = batsmen_df.append(pd.Series([dnb_batsman, "DNB", 0, 0, 0, 0, 0, i+1], index = batsmen_df.columns), ignore_index =True)
                
                
    batsmen_df['Name'] = batsmen_df['Name'].apply(lambda x : x.split(sep=" ")[0][0] +" " + x.split(sep=" ")[-1])

    return batsmen_df
    


In [2]:

def extract_bowling_data(series_id, match_id):

    URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
    page = requests.get(URL)
    bs = BeautifulSoup(page.content, 'lxml')

    table_body=bs.find_all('tbody')
    bowler_df = pd.DataFrame(columns=['Name', 'Overs', 'Maidens', 'Runs', 'Wickets',
                                      'Econ', 'Dots', '4s', '6s', 'Wd', 'Nb','Team'])
    for i, table in enumerate(table_body[1:4:2]):
        rows = table.find_all('tr')
        for row in rows:
            cols=row.find_all('td')
            cols=[x.text.strip() for x in cols]
            bowler_df = bowler_df.append(pd.Series([cols[0], cols[1], cols[2], cols[3], cols[4], cols[5], 
                                                    cols[6], cols[7], cols[8], cols[9], cols[10], (i==0)+1], 
                                                   index=bowler_df.columns ), ignore_index=True)
        bowler_df['Name'] = bowler_df['Name'].apply(lambda x : x.split(sep=" ")[0][0] +" " + x.split(sep=" ")[-1])
    return bowler_df
    

In [3]:
def get_lineup(batsmen_df) :
    
    
    return batsmen_df['Name']

In [4]:
##Dataframe based on actual 22 playing players
def model_df(lineups , batsmen_df , bowler_df) :

    points = ['Name','Presence_in_Match','Runs_Scored',"4's" , "6's" , 'HalfCentury','Century','wickets','Economy_Rate']
    df = pd.DataFrame(columns=points)
    
    df.Name = lineups
    df['Presence_in_Match'] = np.repeat(2,22)
    print(df)
    
    runs = np.repeat(0 , 22)
    
    df['Runs_Scored'] = batsmen_df['Runs'].astype('int')
    df["6's"] = batsmen_df['6s'].astype('int')
    df["4's"] = batsmen_df['4s'].astype('int')
    
    df.loc[df['Runs_Scored']>=100,'Century'] = 16
    df.loc[(df['Runs_Scored']<100) & (df['Runs_Scored']>50),'HalfCentury'] = 8
    df.fillna(0 , inplace = True)
    
    df = df.merge(bowler_df , on = 'Name' , how = 'outer')
    df.drop('wickets' , axis = 1, inplace=True)
    df.drop(labels= [ 'Economy_Rate' , 'Team' ] , axis=1 , inplace=True)
    
    
    df['Wickets'].fillna(0,inplace=True)
    df['Wickets'] = df['Wickets'].astype(int)
    
    
    
    
    
    
    
    
    
    df['Total_Points'] = df['Presence_in_Match'] + df['Runs_Scored']  + df['HalfCentury'] + df['Century'] +(df['Wickets']*25) + (df["4's"]*2) + (df["6's"]*4)
    return df



In [5]:
##Creating Contestants team!
def automate(t1_df,t2_df):
    ##Concatenate 2 squad dataframes together
    cap = 0
    vc = 0
    i = 0
    t =[]
    print(f"USER {i} :Pick your Dream Team From below")
    new_df = pd.concat([t1_df, t2_df], axis=0)
    new_df.reset_index(inplace=True, drop=True)
    print(new_df)
    ##accept 11 players for contestant
    for i in range(11) :
        p = int(input("Enter the id of player you want to be in your team"))
        t.append(new_df.loc[p])
        print(t)
        

        
    print(pd.DataFrame(t))
    ##Choose Captain and Vice-Captain
    cap = int(input("Choose your Captain!"))
    print(f"{new_df.iloc[cap]} is the captain")
    vice = int(input("Choose your Vice - Captain!"))
    print(f"{new_df.iloc[vice]} is the Vice - captain")
    
    return t , new_df.loc[cap] , new_df.loc[vice]

In [6]:
##Create a batting statistics dataframe
batsmen_df = extract_batting_data(series_id = 8048, match_id = 1216508)
for i in range(len(batsmen_df)) :
    
    if " ov" in batsmen_df['Name'][i] :
        batsmen_df.drop(index = i , inplace = True)
batsmen_df = batsmen_df.reset_index()
batsmen_df

Unnamed: 0,index,Name,Desc,Runs,Balls,4s,6s,SR,Team
0,0,Q Kock,c Naik b Shivam Mavi,1,3,0,0,33.33,1
1,1,R Sharma,c Cummins b Shivam Mavi,80,54,3,6,148.15,1
2,2,S Yadav,run out (Narine/Morgan),47,28,6,1,167.86,1
3,3,S Tiwary,c Cummins b Narine,21,13,1,1,161.54,1
4,4,H Pandya,hit wicket b Russell,18,13,2,1,138.46,1
5,5,K Pollard,not out,13,7,1,0,185.71,1
6,6,K Pandya,not out,1,3,0,0,33.33,1
7,7,S Gill,c Pollard b Boult,7,11,1,0,63.64,2
8,8,S Narine,c †de Kock b Pattinson,9,10,0,1,90.0,2
9,9,D Karthik,lbw b Chahar,30,23,5,0,130.43,2


In [8]:
##Create a bowling performance dataframes
bowler_df = extract_bowling_data(series_id = 8048, match_id = 1216508)
bowler_df = bowler_df.reset_index()
bowler_df

Unnamed: 0,index,Name,Overs,Maidens,Runs,Wickets,Econ,Dots,4s,6s,Wd,Nb,Team
0,0,S Warrier,3,0,34,0,11.33,8,5,1,3,0,2
1,1,S Mavi,4,1,32,2,8.0,10,2,0,3,0,2
2,2,P Cummins,3,0,49,0,16.33,3,3,4,3,0,2
3,3,S Narine,4,0,22,1,5.5,8,2,0,0,0,2
4,4,A Russell,2,0,17,1,8.5,4,1,1,0,0,2
5,5,K Yadav,4,0,39,0,9.75,3,0,3,1,1,2
6,6,T Boult,4,1,30,2,7.5,13,6,0,0,0,1
7,7,J Pattinson,4,0,25,2,6.25,13,1,2,1,0,1
8,8,J Bumrah,4,0,32,2,8.0,14,0,4,1,0,1
9,9,R Chahar,4,0,26,2,6.5,11,2,1,0,0,1


In [9]:
##Create a lineup datagrame
lineups = get_lineup(batsmen_df)

In [10]:
##Initial dataframe for matchday squad
model = model_df(lineups, batsmen_df, bowler_df)

           Name  Presence_in_Match Runs_Scored  4's  6's HalfCentury Century  \
0        Q Kock                  2         NaN  NaN  NaN         NaN     NaN   
1      R Sharma                  2         NaN  NaN  NaN         NaN     NaN   
2       S Yadav                  2         NaN  NaN  NaN         NaN     NaN   
3      S Tiwary                  2         NaN  NaN  NaN         NaN     NaN   
4      H Pandya                  2         NaN  NaN  NaN         NaN     NaN   
5     K Pollard                  2         NaN  NaN  NaN         NaN     NaN   
6      K Pandya                  2         NaN  NaN  NaN         NaN     NaN   
7        S Gill                  2         NaN  NaN  NaN         NaN     NaN   
8      S Narine                  2         NaN  NaN  NaN         NaN     NaN   
9     D Karthik                  2         NaN  NaN  NaN         NaN     NaN   
10       N Rana                  2         NaN  NaN  NaN         NaN     NaN   
11     E Morgan                  2      

In [11]:
##25 Men squad for both the teams
t1_df = pd.read_csv('mi.csv')
t2_df = pd.read_csv('kkr.csv')

##Acecept the team amd captains
player , cap , vice = automate(t1_df,t2_df)


user_df = pd.DataFrame(player)
user_df['Name'] = user_df['players']

user_df.drop('players' , axis = 1 , inplace = True)


USER 0 :Pick your Dream Team From below
           players
0         R Sharma
1          A Singh
2          T Boult
3         J Bumrah
4         R Chahar
5   N Coulter-Nile
6           Q Kock
7       D Deshmukh
8         I Kishan
9       D Kulkarni
10          C Lynn
11   M McClenaghan
12          M Khan
13        H Pandya
14        K Pandya
15     J Pattinson
16       K Pollard
17           P Rai
18           A Roy
19    S Rutherford
20          A Tare
21        S Tiwary
22         J Yadav
23         S Yadav
24       D Karthik
25          A Khan
26        T Banton
27       P Cummins
28      L Ferguson
29         C Green
30         K Yadav
31           S Lad
32        E Morgan
33     K Nagarkoti
34          N Naik
35        S Narine
36       P Krishna
37          N Rana
38       A Russell
39       S Warrier
40          S Mavi
41          S Gill
42     M Siddharth
43         R Singh
44      R Tripathi
45  V Chakravarthy
Enter the id of player you want to be in your team0
[players    R S

In [12]:
captain = cap['players']

In [13]:
vice_captain = vice['players']

In [14]:
##Create final user dataframe
user_df = user_df.merge(model , on ='Name' , how='left') 
user_df
user_df['Total_Points'].sum()
cap_id = user_df[user_df['Name'] == captain].index.tolist()
vice_id = user_df[user_df['Name'] == vice_captain].index.tolist()


user_df.fillna(0 , inplace = True)
##Conditions for Captain  and Vice Captain
user_df.at[cap_id[0] , "Total_Points"] = user_df.at[cap_id[0] , "Total_Points"]*2
user_df.at[vice_id[0] , "Total_Points"] = user_df.at[vice_id[0] , "Total_Points"]*1.5
user_df

Unnamed: 0,Name,Presence_in_Match,Runs_Scored,4's,6's,HalfCentury,Century,index,Overs,Maidens,Runs,Wickets,Econ,Dots,4s,6s,Wd,Nb,Total_Points
0,R Sharma,2.0,80.0,3.0,6.0,8.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,240.0
1,T Boult,2.0,0.0,0.0,0.0,0.0,0.0,6.0,4,1,30,2.0,7.5,13,6,0,0,0,52.0
2,J Bumrah,2.0,0.0,0.0,0.0,0.0,0.0,8.0,4,0,32,2.0,8.0,14,0,4,1,0,78.0
3,R Chahar,2.0,0.0,0.0,0.0,0.0,0.0,9.0,4,0,26,2.0,6.5,11,2,1,0,0,52.0
4,N Coulter-Nile,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0.0
5,Q Kock,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,3.0
6,I Kishan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0.0
7,D Kulkarni,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0.0
8,C Lynn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0.0
9,M McClenaghan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0.0


This is your final score!!!

In [15]:
sum(user_df['Total_Points'])

425.0