# Problem 1: Top 10 Run Scorers

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

class Player:
    def __init__(self, player_name, country_name, url):
        self.player_name = player_name
        self.country_name = country_name
        self.url = url


# Function to get scrapped data from "Howstat.com" of individual playes
    
def get_player_stats(request_obj):
    response = requests.get(request_obj.url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', {'class': 'TableLined'})

        # lists to store data
        player = []
        country = []
        opponents = []
        matches_list = []
        innings_list = []
        not_out_list = []
        hundreds_list = []
        fifties_list = []
        runs_list = []
        average_list = []

        # Iterate through rows and extract data
        for row in table.find_all('tr')[1:]:  # Skip the header row
            columns = row.find_all('td')
            opponent = columns[0].text.strip()
            matches = columns[1].text.strip()
            innings = columns[2].text.strip()
            notOut = columns[3].text.strip()
            hundreds = columns[4].text.strip()
            fifties = columns[5].text.strip()
            runs = columns[8].text.strip()
            average = columns[9].text.strip()

            # Append data to lists
            player.append(request_obj.player_name)
            country.append(request_obj.country_name)
            opponents.append(opponent)
            matches_list.append(matches)
            innings_list.append(innings)
            not_out_list.append(notOut)
            hundreds_list.append(hundreds)
            fifties_list.append(fifties)
            runs_list.append(runs)
            average_list.append(average)

        # DataFrame Structure
        data = {
            'Player': player,
            'Country': country,
            'Opponent': opponents,
            'Matches': matches_list,
            'Innings': innings_list,
            'Not_Out': not_out_list,
            'Hundreds': hundreds_list,
            'Fifties': fifties_list,
            'Runs': runs_list,
            'Average': average_list
        }

        df = pd.DataFrame(data)
        return df

    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return None
    


Below is the list of all batsmen and their corresponding webpage-url to scrap the data.

In [4]:
List_Of_Batsmen = [

    # batsmen and all-rounders of INDIA

    Player(player_name="Virat Kohli", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3600#bat"),
    Player(player_name="Rohit Sharma", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3474#bat"),
    Player(player_name="Shubman Gill", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4769#bat"),
    Player(player_name="Shreyas Iyer", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4387#bat"),
    Player(player_name="Suryakumar Yadav", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4073#bat"),
    Player(player_name="Hardik Pandya", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4399#bat"),
    Player(player_name="Ravindra Jadeja", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3644#bat"),
    Player(player_name="Ravichandran Ashwin", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3788#bat"),

    # batsmen and all-rounders of SOUTH AFRICA

    Player(player_name="Temba Bavuma", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4370#bat"),
    Player(player_name="Reeza Hendricks", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4340#bat"),
    Player(player_name="Aiden Markram", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4701#bat"),
    Player(player_name="David Miller", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3783#bat"),
    Player(player_name="Rassie van der Dussen", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4817#bat"),
    Player(player_name="Marco Jansen", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5974#bat"),
    Player(player_name="Andile Phehlukwayo", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4588#bat"),
    
    
    # batsmen and all-rounders AUSTRALIA

    Player(player_name="Steven Smith", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3756#bat"),
    Player(player_name="Travis Head", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4386#bat"),
    Player(player_name="David Warner", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3637#bat"),
    Player(player_name="Marnus Labuschagne", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4816#bat"),
    Player(player_name="Cameroon Green", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5926#bat"),
    Player(player_name="Mitchel Marsh", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3912#bat"),
    Player(player_name="Glenn Maxwell", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3969#bat"),
    Player(player_name="Marcus Stoinis", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4389#bat"),
    Player(player_name="Sean Abbot", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4331#bat"),


    # batsmen and all-rounders New Zealand

    Player(player_name="Kane Williamson", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3817#bat"),
    Player(player_name="Mark Chapman", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4278#bat"),
    Player(player_name="Glenn Phillips", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4647#bat"),
    Player(player_name="Will Young", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5927#bat"),
    Player(player_name="Daryl Mitchell", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4850#bat"),
    Player(player_name="James Neesham", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3986#bat"),
    Player(player_name="Rachin Ravindra", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6150#bat"),
    Player(player_name="Mitchell Santner", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4429#bat"),


    # batsmen and all-rounders Pakistan

    
    Player(player_name="Babar Azam", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4425#bat"),
    Player(player_name="Abdullah Shafique", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5921#bat"),
    Player(player_name="Fakhar Zaman", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4654#bat"),
    Player(player_name="Imam-ul-Haq", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4706#bat"),
    Player(player_name="Iftikhar Ahmed", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4488#bat"),
    Player(player_name="Agha Salman", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6790#bat"),
    Player(player_name="Saud Shakeel", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6039#bat"),
    Player(player_name="Shadab Khan", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4653#bat"),
    Player(player_name="Mohammad Nawaz", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4536#bat"),

    # batsmen and all-rounders Netherlands

    Player(player_name="Vikramjit Singh", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5480#bat"),
    Player(player_name="Sybrand Engelbrecht", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7565#bat"),
    Player(player_name="Scott Edwards", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4792"),
    Player(player_name="Wesley Barresi", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3807#bat"),
    Player(player_name="Saqib Zulfiqar", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4790#bat"),
    Player(player_name="Max O'Dowd", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4443#bat"),
    Player(player_name="Colin Ackermann", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5508#bat"),
    Player(player_name="Bas De Leede", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4791#bat"),
    Player(player_name="Teja Nidamanuru", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6620#bat"),
    Player(player_name="Shariz Ahmad", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6623#bat"),
    Player(player_name="Roelof Van Der Merwe", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3658#bat"),


    # batsmen and all-rounders Afghanistan

    Player(player_name="Rahmanulla Gurbaz", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5474#bat"),
    Player(player_name="Ibrahim Zadran", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5471#bat"),
    Player(player_name="Ikram Ali khil", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4928#bat"),
    Player(player_name="Najibullah Zadran", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3960#bat"),
    Player(player_name="Hashmatullah Shahidi", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4239#bat"),
    Player(player_name="Rahmat Shah", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4009#bat"),
    Player(player_name="Mohammad Nabi", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3665#bat"),
    Player(player_name="Rashid Khan", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4484#bat"),
    Player(player_name="Azmatullah Omarzai", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5936#bat"),


    # batsmen and all-rounders of England

    Player(player_name="Jonny Bairstow", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3900#bat"),
    Player(player_name="Dawid Malan", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4694#bat"),
    Player(player_name="Joe Root", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3981#bat"),
    Player(player_name="Harry Brook", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6470#bat"),
    Player(player_name="Jos Buttler", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3888#bat"),
    Player(player_name="Liam Livingstone", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4692#bat"),
    Player(player_name="Moeen Ali", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4271#bat"),
    Player(player_name="Ben Stokes", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3883#bat"),
    Player(player_name="Sam Curran", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4787#bat"),
    Player(player_name="Chris Woakes", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3846#bat"),
    Player(player_name="David Willey", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4420#bat"),


    # batsmen and all-rounders of Bangladesh

    Player(player_name="Tanzid Hasan", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7451#bat"),
    Player(player_name="Litton Das", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4430#bat"),
    Player(player_name="Najmul Hossain Shanto", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4633#bat"),
    Player(player_name="Mushfiqur Rahim", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3279#bat"),
    Player(player_name="Tawid Hridoy", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7162#bat"),
    Player(player_name="Shakib Al Hasan", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3426#bat"),
    Player(player_name="Mahedi Hasan", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4737#bat"),
    Player(player_name="Mahmudullah", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3488#bat"),
    Player(player_name="Mehidy Hasan Miraz", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4598#bat"),

    # batsmen and all-rounders of Sri Lanka

    Player(player_name="Pathum Nissanka", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5951#bat"),
    Player(player_name="Kusal Perera", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3994#bat"),
    Player(player_name="Kusal Mendis", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4485#bat"),
    Player(player_name="Sadeera Samarawickrama", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4704#bat"),
    Player(player_name="Dasun Shanaka", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4476#bat"),
    Player(player_name="Dushan Hemantha", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7299"),
    Player(player_name="Dhananjaya de Silva", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4472#bat"),
    Player(player_name="Charith Asalanka", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6032#bat"),


]


In [5]:
final_df = pd.DataFrame()
for pl in List_Of_Batsmen:
    result_df = get_player_stats(pl)
    final_df = pd.concat([final_df, result_df], ignore_index=True)
print(final_df)
final_df.to_csv('./batting_scrapped.csv' , index=False)

                Player    Country              Opponent Matches Innings  \
0          Virat Kohli      India           Afghanistan       3       2   
1          Virat Kohli      India             Australia      48      46   
2          Virat Kohli      India            Bangladesh      16      16   
3          Virat Kohli      India               England      36      36   
4          Virat Kohli      India               Ireland       2       2   
...                ...        ...                   ...     ...     ...   
1119  Charith Asalanka  Sri Lanka          South Africa       4       4   
1120  Charith Asalanka  Sri Lanka  United Arab Emirates       1       1   
1121  Charith Asalanka  Sri Lanka           West Indies       1       0   
1122  Charith Asalanka  Sri Lanka              Zimbabwe       4       3   
1123  Charith Asalanka  Sri Lanka          Overall (15)      50      44   

     Not_Out Hundreds Fifties  Runs Average  
0          1        0       2   122  122.00  
1      

In [74]:
print(final_df['Country'].unique())

['India' 'South Africa' 'Australia' 'New Zealand' 'Pakistan' 'Netherlands'
 'Afghanistan' 'England' 'Bangladesh' 'Sri Lanka']


In [75]:
final_df['Average'] = pd.to_numeric(final_df['Average'], errors='coerce')
final_df['Matches'] = pd.to_numeric(final_df['Matches'], errors='coerce')
final_df['Innings'] = pd.to_numeric(final_df['Innings'], errors='coerce')
final_df['Hundreds'] = pd.to_numeric(final_df['Hundreds'], errors='coerce')
final_df['Fifties'] = pd.to_numeric(final_df['Fifties'], errors='coerce')
final_df['Runs'] = pd.to_numeric(final_df['Runs'], errors='coerce')

In [76]:
print(final_df)

                Player    Country              Opponent  Matches  Innings  \
0          Virat Kohli      India           Afghanistan        3        2   
1          Virat Kohli      India             Australia       48       46   
2          Virat Kohli      India            Bangladesh       16       16   
3          Virat Kohli      India               England       36       36   
4          Virat Kohli      India               Ireland        2        2   
...                ...        ...                   ...      ...      ...   
1119  Charith Asalanka  Sri Lanka          South Africa        4        4   
1120  Charith Asalanka  Sri Lanka  United Arab Emirates        1        1   
1121  Charith Asalanka  Sri Lanka           West Indies        1        0   
1122  Charith Asalanka  Sri Lanka              Zimbabwe        4        3   
1123  Charith Asalanka  Sri Lanka          Overall (15)       50       44   

     Not_Out  Hundreds  Fifties  Runs  Average  
0          1         0    

We will only use data of the teams which are participating in the World Cup 2023.

In [77]:
Participating_Teams = ['India' , 'Australia' , 'England' , 'New Zealand' , 'Pakistan' , 'Afghanistan' , 
                       'Sri Lanka' , 'Netherlands' , 'Bangladesh' , 'South Africa']

final_df = final_df[final_df['Opponent'].isin(Participating_Teams)]
final_df['Opponent'].unique()



array(['Afghanistan', 'Australia', 'Bangladesh', 'England', 'Netherlands',
       'New Zealand', 'Pakistan', 'South Africa', 'Sri Lanka', 'India'],
      dtype=object)

Now we will encode the player names and team names in our dataframe using LabelEncoder.

In [78]:
from sklearn.preprocessing import LabelEncoder
label_encoder_players = LabelEncoder()
final_df['Encoded_Player'] = label_encoder_players.fit_transform(final_df['Player'])

In [79]:
label_encoder_teams = LabelEncoder()
label_encoder_teams.fit(Participating_Teams)
final_df['Encoded_Country'] = label_encoder_teams.fit_transform(final_df['Country'])
final_df['Encoded_Opponent'] = label_encoder_teams.fit_transform(final_df['Opponent'])    

Now we will extract only those columns which are meaningful for the prediction of runs.

In [80]:
Train_df = final_df[['Encoded_Player' , 'Encoded_Country' , 'Encoded_Opponent' , 'Matches' , 'Runs']]
print(Train_df)

      Encoded_Player  Encoded_Country  Encoded_Opponent  Matches  Runs
0                 86                4                 0        3   122
1                 86                4                 1       48  2313
2                 86                4                 2       16   910
3                 86                4                 3       36  1340
6                 86                4                 5        2    63
...              ...              ...               ...      ...   ...
1112               9                9                 4        9   189
1114               9                9                 5        3    82
1115               9                9                 6        3    26
1117               9                9                 7        2    50
1119               9                9                 8        4   275

[741 rows x 5 columns]


In [81]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

First we will try training and testing the dataset on RandomForest Regressor.

In [82]:
X = Train_df[['Encoded_Player' , 'Encoded_Country' , 'Encoded_Opponent' , 'Matches']]
y = Train_df[['Runs']]
X['Matches'] = pd.to_numeric(X['Matches'], errors='coerce')
y['Runs'] = pd.to_numeric(y['Runs'], errors='coerce')


In [83]:
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

In [84]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [85]:
history = rf_regressor.fit(X_train, y_train)
y_pred_train = rf_regressor.predict(X_train)
y_pred_test = rf_regressor.predict(X_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

print(f'R-squared on Training Set: {r2_train}')
print(f'R-squared on Test Set: {r2_test}')

R-squared on Training Set: 0.9702923844931751
R-squared on Test Set: 0.690022168245436


We can see that we are getting around 70% accuracy in Random Forest Regressor.

In [86]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=0)
y_pred_dnn = model.predict(X_test)
r2_test = r2_score(y_test, y_pred_dnn)
print(f'R-squared on Test Set: {r2_test}')

R-squared on Test Set: 0.7102352045584988


- We are getting around 71% accuracy in DNN. The low accuracy can be attributed to the complex and multifaceted nature of the sport. Cricket matches are influenced by a myriad of factors, including player form, team dynamics, historical performance, and strategic decisions during the game. However, the predictive model may not fully capture certain crucial elements that significantly impact match outcomes. Factors such as the specific ground conditions, varying weather conditions, and the result of the toss can introduce unpredictable variables that contribute to the inherent uncertainty in cricket matches. These external elements, which are beyond the scope of the current model, play a substantial role in determining the final result. To enhance the accuracy of predictions, it would be necessary to incorporate a more comprehensive set of features, including detailed information about the playing venue, weather forecasts, and toss results, to better account for the diverse and dynamic nature of cricket matches

In [87]:
player_country = final_df[['Player' , 'Encoded_Player' , 'Country' , 'Encoded_Country']]
player_country.drop_duplicates(inplace=True)


- We will extract information of individual players

In [88]:
print(player_country)

                      Player  Encoded_Player    Country  Encoded_Country
0                Virat Kohli              86      India                4
15              Rohit Sharma              65      India                4
31              Shubman Gill              76      India                4
43              Shreyas Iyer              75      India                4
55          Suryakumar Yadav              78      India                4
...                      ...             ...        ...              ...
1055  Sadeera Samarawickrama              66  Sri Lanka                9
1071           Dasun Shanaka              13  Sri Lanka                9
1087         Dushan Hemantha              19  Sri Lanka                9
1092     Dhananjaya de Silva              18  Sri Lanka                9
1108        Charith Asalanka               9  Sri Lanka                9

[89 rows x 4 columns]


In [89]:
# A helper function to get the encoded_country

def get_encoded_country(input_country):
    
    for index, row in player_country.iterrows():
        if(row['Country']==input_country) :
            return row['Encoded_Country']
    return 'Error'

- With the help of our model , we will calculate total runs of all the players according to 'Round-Robin' format.

In [90]:
player_runs = {}
for index , row in player_country.iterrows() :
    player_runs[row['Player']] = 0

cnt = 0
for opponent in Participating_Teams :
    for index , row in player_country.iterrows() :
        if(row['Country']!=opponent) :  
            
            runs = model.predict([[row['Encoded_Player'], row['Encoded_Country'], get_encoded_country(opponent), 1]])
            player_runs[row['Player']]+= (int(runs[0][0]))    



Now we will sort the output according to runs and get the top 10 run scorers.

In [91]:
from collections import OrderedDict
sorted_list = OrderedDict(sorted(player_runs.items(), key=lambda item: item[1], reverse=True))
# Print the top 10 items
top_10 = dict(list(sorted_list.items())[:10])
print('Top Run Scorers Before Semifinals:')
for it in top_10 :
    print(f'Player Name : {it} , Runs : {top_10[it]}')

Top Run Scorers Before Semifinals:
Player Name : Travis Head , Runs : 467
Player Name : Will Young , Runs : 452
Player Name : Wesley Barresi , Runs : 448
Player Name : Virat Kohli , Runs : 444
Player Name : Vikramjit Singh , Runs : 439
Player Name : Tawid Hridoy , Runs : 435
Player Name : Temba Bavuma , Runs : 433
Player Name : Tanzid Hasan , Runs : 429
Player Name : Steven Smith , Runs : 428
Player Name : Teja Nidamanuru , Runs : 425


# Problem 2 : Top 10 Wicket Takers

In [6]:

# Function to get scrapped data from "Howstat.com" of individual playes
    
def get_bowler_stats(request_obj):
    response = requests.get(request_obj.url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        div = soup.find('div', {'id' : 'bowl'})
        table = div.find('table' ,{'class' : 'TableLined'})
        # lists to store data
        player = []
        country = []
        opponents = []
        matches_list = []
        overs_list = []
        maiden_list = []
        runs_list = []
        wickets_list = []
        

        # Iterate through rows and extract data
        for row in table.find_all('tr')[1:]:  # Skip the header row
            columns = row.find_all('td')
            opponent = columns[0].text.strip()
            matches = columns[1].text.strip()
            overs = columns[2].text.strip()
            maidens = columns[3].text.strip()
            runs = columns[4].text.strip()
            wickets = columns[5].text.strip()
            

            # Append data to lists
            player.append(request_obj.player_name)
            country.append(request_obj.country_name)
            opponents.append(opponent)
            matches_list.append(matches)
            overs_list.append(overs)
            maiden_list.append(maidens)
            runs_list.append(runs)
            wickets_list.append(wickets)

        # DataFrame Structure
        data = {
            'Player': player,
            'Country': country,
            'Opponent': opponents,
            'Matches': matches_list,
            'Overs': overs_list,
            'Maidens': maiden_list,
            'Runs': runs_list,
            'Wickets': wickets_list,
        }

        df = pd.DataFrame(data)
        return df

    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return None
    


In [7]:
# pl = Player(player_name="Ravindra Jadeja", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3644#bowl")

# get_bowler_stats(pl)

List_Of_Bowlers = [

    # Bowlers of Afghanistan

    Player(player_name="Noor Ahmad", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6536#bowl"),
    Player(player_name="Abdul Rahman", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7367#bowl"),
    Player(player_name="Naveen-ul-Haq", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4590#bowl"),
    Player(player_name="Fazhalaq Farooqi", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5955#bowl"),
    Player(player_name="Mujeeb Ur Rahman", country_name="Afghanistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4714#bowl"),

    #Bowlers of England

    Player(player_name="Gus Atkinson", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7452#bowl"),
    Player(player_name="Reece Topley", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4479#bowl"),
    Player(player_name="Mark Wood", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4421#bowl"),
    Player(player_name="Adil Rashid", country_name="England", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3696#bowl"),


    # Bowlers of Bangladesh

    Player(player_name="Nasum Ahmed", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5957#bowl"),
    Player(player_name="Tanzim Hasan Sakib", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7454#bowl"),
    Player(player_name="Hasan Mahmud", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5842#bowl"),
    Player(player_name="Mustafizur Rahman", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4414#bowl"),
    Player(player_name="Shoriful Islam", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5958#bowl"),
    Player(player_name="Taskin Ahmed", country_name="Bangladesh", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4299#bowl"),

    # Bowlers of Sri lanka

    Player(player_name="Chamika Karunaratne", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4847#bowl"),
    Player(player_name="Matheesha Pathirana", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6613#bowl"),
    Player(player_name="Lahiru Kumara", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4599#bowl"),
    Player(player_name="Dunith Wellalage", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6655#bowl"),
    Player(player_name="Dilshan Madushanka", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6859#bowl"),
    Player(player_name="Dushmantha Chameera", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4380#bowl"),
    Player(player_name="Kasun Rajitha", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4531#bowl"),
    Player(player_name="Maheesh Theekshana", country_name="Sri Lanka", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6178#bowl"),

    # Bowlers of Netherlands

    Player(player_name="Ryan Klein", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6468#bowl"),
    Player(player_name="Paul van Meekeren", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4021#bowl"),
    Player(player_name="Aryan Dutt", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5990#bowl"),
    Player(player_name="Logan van Beek", country_name="Netherlands", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4295#bowl"),

    # Bowlers of India

    Player(player_name="Shardul Thakur", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4393#bowl"),
    Player(player_name="Jasprit Bumrah", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4062#bowl"),
    Player(player_name="Kuldeep Yadav", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4652#bowl"),
    Player(player_name="Mohammad Shami", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3993#bowl"),
    Player(player_name="Mohammad Siraj", country_name="India", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4675#bowl"),


    # Bowlers of South Africa

    Player(player_name="Keshav Maharaj", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4601#bowl"),
    Player(player_name="Luigi Ngidi", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4636#bowl"),
    Player(player_name="Kagiso Rabada", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4339#bowl"),
    Player(player_name="Tabraiz Shamsi", country_name="South Africa", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4562#bowl"),



    # Bowlers of Australia

    Player(player_name="Pat Cummins", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3909#bowl"),
    Player(player_name="Adam Zampa", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4527#bowl"),
    Player(player_name="Mitchell Starc", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3830#bowl"),
    Player(player_name="Josh Hazlewood", country_name="Australia", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3799#bowl"),


    # Bowlers of New Zealand

    Player(player_name="Trent Boult", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3924#bowl"),
    Player(player_name="Lockie Ferguson", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4609#bowl"),
    Player(player_name="Ish Sodhi", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4243#bowl"),
    Player(player_name="Tim Southee", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=3542#bowl"),
    Player(player_name="Mat Henry", country_name="New Zealand", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4257#bowl"),

     # Bowlers of Pakistan
    Player(player_name="Haris Rauf", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=5771#bowl"),
    Player(player_name="Hasan Ali", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4579#bowl"),
    Player(player_name="Shaheen Afridi", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=4798#bowl"),
    Player(player_name="Usama Mir", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=7117#bowl"),
    Player(player_name="Md. Wasim Jr.", country_name="Pakistan", url="http://www.howstat.com/cricket/Statistics/Players/PlayerOpponents_ODI.asp?PlayerID=6058#bowl"),

]

In [8]:
bowler_df = pd.DataFrame()
for pl in List_Of_Bowlers:
    result_df = get_bowler_stats(pl)
    bowler_df = pd.concat([bowler_df, result_df], ignore_index=True)
print(bowler_df)
bowler_df.to_csv('./bowlers_scrapped.csv' , index=False)

         Player      Country      Opponent Matches  Overs Maidens  Runs  \
0    Noor Ahmad  Afghanistan     Australia       1   10.0       1    53   
1    Noor Ahmad  Afghanistan   Netherlands       1    9.0       0    31   
2    Noor Ahmad  Afghanistan      Pakistan       1   10.0       0    49   
3    Noor Ahmad  Afghanistan  South Africa       1    9.0       0    49   
4    Noor Ahmad  Afghanistan     Sri Lanka       3   21.0       0   156   
..          ...          ...           ...     ...    ...     ...   ...   
522   Mat Henry  New Zealand  South Africa       4   31.3       5   145   
523   Mat Henry  New Zealand     Sri Lanka      12   89.4       9   412   
524   Mat Henry  New Zealand   West Indies       3   21.0       1   130   
525   Mat Henry  New Zealand      Zimbabwe       2   17.0       2   103   
526   Mat Henry  New Zealand  Overall (12)      82  712.5      54  3722   

    Wickets  
0         0  
1         2  
2         3  
3         0  
4         2  
..      ...  
5

In [95]:
# final_df['Average'] = pd.to_numeric(final_df['Average'], errors='coerce')
bowler_df['Matches'] = pd.to_numeric(bowler_df['Matches'] , errors = 'coerce')
bowler_df['Overs'] = pd.to_numeric(bowler_df['Overs'] , errors = 'coerce')
bowler_df['Runs'] = pd.to_numeric(bowler_df['Runs'] , errors = 'coerce')
bowler_df['Maidens'] = pd.to_numeric(bowler_df['Maidens'] , errors = 'coerce')
bowler_df['Wickets'] = pd.to_numeric(bowler_df['Wickets'] , errors = 'coerce')

In [96]:
bowler_df = bowler_df[bowler_df['Opponent'].isin(Participating_Teams)]
bowler_df['Opponent'].unique()

array(['Australia', 'Netherlands', 'Pakistan', 'South Africa',
       'Sri Lanka', 'Bangladesh', 'England', 'India', 'New Zealand',
       'Afghanistan'], dtype=object)

In [97]:
label_encoder_bowlers = LabelEncoder()
bowler_df['Encoded_Player'] = label_encoder_players.fit_transform(bowler_df['Player'])
print(bowler_df)

         Player      Country      Opponent  Matches  Overs  Maidens  Runs  \
0    Noor Ahmad  Afghanistan     Australia        1   10.0        1    53   
1    Noor Ahmad  Afghanistan   Netherlands        1    9.0        0    31   
2    Noor Ahmad  Afghanistan      Pakistan        1   10.0        0    49   
3    Noor Ahmad  Afghanistan  South Africa        1    9.0        0    49   
4    Noor Ahmad  Afghanistan     Sri Lanka        3   21.0        0   156   
..          ...          ...           ...      ...    ...      ...   ...   
518   Mat Henry  New Zealand         India       10   90.2        8   399   
520   Mat Henry  New Zealand   Netherlands        3   24.0        2   107   
521   Mat Henry  New Zealand      Pakistan       14  130.3        7   700   
522   Mat Henry  New Zealand  South Africa        4   31.3        5   145   
523   Mat Henry  New Zealand     Sri Lanka       12   89.4        9   412   

     Wickets  Encoded_Player  
0          0              33  
1          2 

In [98]:
bowler_df['Encoded_Country'] = label_encoder_teams.fit_transform(bowler_df['Country'])
bowler_df['Encoded_Opponent'] = label_encoder_teams.fit_transform(bowler_df['Opponent'])

print(bowler_df)

         Player      Country      Opponent  Matches  Overs  Maidens  Runs  \
0    Noor Ahmad  Afghanistan     Australia        1   10.0        1    53   
1    Noor Ahmad  Afghanistan   Netherlands        1    9.0        0    31   
2    Noor Ahmad  Afghanistan      Pakistan        1   10.0        0    49   
3    Noor Ahmad  Afghanistan  South Africa        1    9.0        0    49   
4    Noor Ahmad  Afghanistan     Sri Lanka        3   21.0        0   156   
..          ...          ...           ...      ...    ...      ...   ...   
518   Mat Henry  New Zealand         India       10   90.2        8   399   
520   Mat Henry  New Zealand   Netherlands        3   24.0        2   107   
521   Mat Henry  New Zealand      Pakistan       14  130.3        7   700   
522   Mat Henry  New Zealand  South Africa        4   31.3        5   145   
523   Mat Henry  New Zealand     Sri Lanka       12   89.4        9   412   

     Wickets  Encoded_Player  Encoded_Country  Encoded_Opponent  
0        

In [99]:
Bowler_Train_df = bowler_df[['Encoded_Player' , 'Encoded_Country' , 'Encoded_Opponent' , 'Matches' , 'Wickets']]
print(Bowler_Train_df)

     Encoded_Player  Encoded_Country  Encoded_Opponent  Matches  Wickets
0                33                0                 1        1        0
1                33                0                 5        1        2
2                33                0                 7        1        3
3                33                0                 8        1        0
4                33                0                 9        3        2
..              ...              ...               ...      ...      ...
518              24                6                 4       10       16
520              24                6                 5        3        8
521              24                6                 7       14       28
522              24                6                 8        4        2
523              24                6                 9       12       24

[350 rows x 5 columns]


In [100]:
X = Bowler_Train_df[['Encoded_Player' , 'Encoded_Country' , 'Encoded_Opponent' , 'Matches']]
y = Bowler_Train_df[['Wickets']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [101]:
model2 = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

model2.compile(optimizer='adam', loss='mean_squared_error')
model2.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=0)
y_pred_dnn = model2.predict(X_test)
r2_test = r2_score(y_test, y_pred_dnn)
print(f'R-squared on Test Set: {r2_test}')

R-squared on Test Set: 0.8562396325570871


Here we can see that we are getting around 87% accuracy on the testing

In [102]:
bowler_country = bowler_df[['Player' , 'Encoded_Player' , 'Country' , 'Encoded_Country']]
bowler_country.drop_duplicates(inplace=True)
print(bowler_country)

                  Player  Encoded_Player       Country  Encoded_Country
0             Noor Ahmad              33   Afghanistan                0
6           Abdul Rahman               0   Afghanistan                0
9          Naveen-ul-Haq              32   Afghanistan                0
20      Fazhalaq Farooqi               8   Afghanistan                0
29      Mujeeb Ur Rahman              29   Afghanistan                0
45          Gus Atkinson               9       England                3
50          Reece Topley              36       England                3
60             Mark Wood              23       England                3
72           Adil Rashid               2       England                3
85           Nasum Ahmed              31    Bangladesh                2
93    Tanzim Hasan Sakib              41    Bangladesh                2
97          Hasan Mahmud              10    Bangladesh                2
107    Mustafizur Rahman              30    Bangladesh          

In [103]:
# A helper function to get the encoded_country

def get_encoded_bowler_country(input_country):
    
    for index, row in bowler_country.iterrows():
        if(row['Country']==input_country) :
            return row['Encoded_Country']
    return 'Error'

In [104]:
player_wickets = {}
for index , row in bowler_country.iterrows() :
    player_wickets[row['Player']] = 0

for opponent in Participating_Teams :
    for index , row in bowler_country.iterrows() :
        if(row['Country']!=opponent) :  
            
            wickets = model2.predict([[row['Encoded_Player'], row['Encoded_Country'], get_encoded_country(opponent), 1]])
            player_wickets[row['Player']]+= (int(wickets[0][0]))    



In [105]:
sorted_bowlers = OrderedDict(sorted(player_wickets.items(), key=lambda item: item[1], reverse=True))
# Print the top 10 items
top_10_bowlers = dict(list(sorted_bowlers.items())[:10])
print('Top Wicket takers Before Semifinals:')
for it in top_10_bowlers :
    print(f'Player Name : {it} , Wickets : {top_10_bowlers[it]}')


Top Wicket takers Before Semifinals:
Player Name : Lahiru Kumara , Wickets : 18
Player Name : Kasun Rajitha , Wickets : 18
Player Name : Kagiso Rabada , Wickets : 17
Player Name : Dunith Wellalage , Wickets : 16
Player Name : Dushmantha Chameera , Wickets : 16
Player Name : Keshav Maharaj , Wickets : 15
Player Name : Dilshan Madushanka , Wickets : 14
Player Name : Ish Sodhi , Wickets : 13
Player Name : Adil Rashid , Wickets : 12
Player Name : Maheesh Theekshana , Wickets : 12
