## Recommeding top 18

In [1]:
import pandas as pd
import numpy as np
import json
import re

import matplotlib.pyplot as plt
from datetime import datetime
import tensorflow.keras as keras


In [2]:
team = 'Australia'

In [3]:
date_to_start = 'May,01,2018'  # keep this format

In [4]:
sequence_length = 10

In [5]:
international_weight = 0.9

In [6]:
def top_players(X, dataframe, players_data, model_path, skip_bowler, only_bowler):
    inf_data = []
    for player_id, indices in X.groupby('Player ID').indices.items():
        if len(indices) < sequence_length:
            continue
        inf_data.append(X.loc[indices].sort_values('Date').iloc[-sequence_length + 1:].index)
        
    test_data = np.array([X.loc[idx].iloc[:, 2:].values for idx in inf_data])
    inf_data = np.array([X.loc[idx].values for idx in inf_data])
    model = keras.models.load_model(model_path)
    print(test_data.shape)
    preds = model.predict(test_data).flatten().round(3)
    team_players = dataframe['Player ID'].unique()
    
    player_ratings = []
    player_names = []
    player_roles = []
    bowling_styles = []
    for data, rating in zip(inf_data, preds):
        player_id = data[0][0]
        if player_id not in team_players:
            continue
        player_name = dataframe[dataframe['Player ID'] == player_id]['Player Name'].values[0]
        player_role = players_data[players_data['Player ID'] == player_id]['Playing Role'].values[0]
        bowling_style = players_data[players_data['Player ID'] == player_id]['Bowling Style'].values[0]
        if skip_bowler and player_role == "Bowler":
            continue
        if only_bowler and player_role not in ["Bowler", 'Bowling Allrounder', 'Allrounder']:
            continue
        player_ratings.append(rating)
        player_names.append(player_name)
        player_roles.append(player_role)
        bowling_styles.append(bowling_style)
        
    player_ratings = np.array(player_ratings)
    player_names = np.array(player_names)
    player_roles = np.array(player_roles)
    bowling_styles = np.array(bowling_styles)
    
    return player_names, player_ratings, player_roles, bowling_styles


### Loading international data and model

In [7]:
with open("results/international/player_nationalities.json", "r") as fp:
    player_nationalities = json.load(fp)

In [8]:
players_data_intl = pd.read_excel("results/international/players.xlsx")

In [9]:
batting_intl = pd.read_excel("results/international/batting.xlsx")
bowling_intl = pd.read_excel("results/international/bowling.xlsx")

In [10]:
X_bat = pd.read_csv("./results/international/traindata_batting.csv")
X_bat['Date'] = pd.to_datetime(X_bat['Date'])
X_bat = X_bat[X_bat['Date'] > datetime.strptime(date_to_start, "%B,%d,%Y")].reset_index(drop=True)
X_bat = X_bat[X_bat['Player ID'].apply(lambda x: player_nationalities[str(x)] == team)].reset_index(drop=True)

In [11]:
X_bowl = pd.read_csv("./results/international/traindata_bowling.csv")
X_bowl['Date'] = pd.to_datetime(X_bowl['Date'])
X_bowl = X_bowl[X_bowl['Date'] > datetime.strptime(date_to_start, "%B,%d,%Y")].reset_index(drop=True)
X_bowl = X_bowl[X_bowl['Player ID'].apply(lambda x: player_nationalities[str(x)] == team)].reset_index(drop=True)

In [12]:
intl_bowling_types = players_data_intl[players_data_intl['Playing Role'].apply(lambda x: x in ["Bowler", 'Bowling Allrounder', 'Allrounder'])]['Bowling Style'].unique()
intl_spin_types = [b for b in intl_bowling_types if not re.match('.*fast.*', b.lower()) and not re.match('.*medium.*', b.lower())]

In [13]:

intl_batsmen, intl_batsmen_ratings,intl_batsmen_roles, _ = top_players(X_bat, batting_intl, players_data_intl, "results/international/batting.h5", 
                                                     skip_bowler=True, only_bowler=False)

(13, 9, 27)


In [14]:
intl_bowlers, intl_bowler_ratings,intl_bowler_roles, intl_bowling_styles = top_players(X_bowl, bowling_intl, players_data_intl, "results/international/bowling.h5", 
                                                     skip_bowler=False, only_bowler=True)

(7, 9, 25)


### Loading domestic data and model

In [15]:
with open(f"results/domestic/{team.lower()}/player_teams.json", "r") as fp:
    player_teams = json.load(fp)

In [16]:
players_data_dmstc = pd.read_excel(f"results/domestic/{team.lower()}/players.xlsx")

In [17]:
date_to_start = 'May,01,2018'  # keep this format

In [18]:
batting_dmstc = pd.read_excel(f"results/domestic/{team.lower()}/batting.xlsx")
bowling_dmstc = pd.read_excel(f"results/domestic/{team.lower()}/bowling.xlsx")

In [19]:
non_eng_aus_teams = ['Pakistan', 'India', 'South Africa', 'New Zealand',
       'Afghanistan', 'Bangladesh', 'Zimbabwe', 'West Indies',
       'Sri Lanka', 'Ireland', 'Easterns (Zimbabwe)',
       'Scotland', 'Papua New Guinea',
       'United States of America', 
       'Bangladesh Cricket Board Academy', 'Ireland']

In [20]:
non_eng_aus_teams.extend(['Hong Kong Cricket Club', 'Gujranwala Cricket Association', 'Italy', 'Dhaka Dynamites', 'Colombo Kings','Colombo Cricket Club',
                         'Abahani Limited','Germany','Dhaka Gladiators', 'Australia Under-19s', 'Multan Region','Denmark', 'Netherlands'])

In [21]:
non_eng_aus_teams.append('England') if team.lower() == "australia" else non_eng_aus_teams.append('Australia')

In [22]:
X_bat = pd.read_csv(f"./results/domestic/{team.lower()}/traindata_batting.csv")
X_bat['Date'] = pd.to_datetime(X_bat['Date'])
X_bat = X_bat[X_bat['Date'] > datetime.strptime(date_to_start, "%B,%d,%Y")].reset_index(drop=True)
X_bat = X_bat[X_bat['Player ID'].apply(lambda x: player_teams[str(x)] not in non_eng_aus_teams )].reset_index(drop=True)


In [23]:
in_team = X_bat['Player ID'].apply(lambda x: re.match('.*australia.*', players_data_dmstc[(players_data_dmstc['Player ID'] == x)]['All teams'].values[0].lower()))
X_bat = X_bat[in_team.apply(lambda x: x != None)].reset_index(drop=True)

In [24]:
X_bowl = pd.read_csv(f"./results/domestic/{team.lower()}/traindata_bowling.csv")
X_bowl['Date'] = pd.to_datetime(X_bowl['Date'])
X_bowl = X_bowl[X_bowl['Date'] > datetime.strptime(date_to_start, "%B,%d,%Y")].reset_index(drop=True)
X_bowl = X_bowl[X_bowl['Player ID'].apply(lambda x: player_teams[str(x)] not in non_eng_aus_teams )].reset_index(drop=True)

In [25]:
in_team = X_bowl['Player ID'].apply(lambda x: re.match('.*australia.*', players_data_dmstc[(players_data_dmstc['Player ID'] == x)]['All teams'].values[0].lower()))
X_bowl = X_bowl[in_team.apply(lambda x: x != None)].reset_index(drop=True)

In [26]:
dmstc_bowling_types = players_data_dmstc[players_data_dmstc['Playing Role'].apply(lambda x: x in ["Bowler", 'Bowling Allrounder', 'Allrounder'])]['Bowling Style'].unique()
dmstc_spin_types = [b for b in dmstc_bowling_types if not re.match('.*fast.*', b.lower()) and not re.match('.*medium.*', b.lower())]

In [27]:
dmstc_batsmen, dmstc_batsmen_ratings,dmstc_batsmen_roles, _ = top_players(X_bat, batting_dmstc, players_data_dmstc, f"results/domestic/{team.lower()}/batting.h5", 
                                                     skip_bowler=True, only_bowler=False)

(62, 9, 23)


In [28]:
dmstc_bowlers, dmstc_bowler_ratings,dmstc_bowler_roles, dmstc_bowling_styles = top_players(X_bowl, bowling_dmstc,players_data_dmstc, f"results/domestic/{team.lower()}/bowling.h5", 
                                                     skip_bowler=False, only_bowler=True)

(38, 9, 25)


### 2 Wicket-keepers

In [29]:
# top wicket keepers
wicket_keepers = intl_batsmen[intl_batsmen_roles == "Wicketkeeper Batter"]
wicket_keeper_ratings = intl_batsmen_ratings[intl_batsmen_roles == "Wicketkeeper Batter"] * international_weight

wicket_keepers_d = dmstc_batsmen[dmstc_batsmen_roles == "Wicketkeeper Batter"]
wicket_keepers_d = np.array(["[D] " + d for d in wicket_keepers_d])
wicket_keepers = np.append(wicket_keepers, wicket_keepers_d)
wicket_keeper_ratings = np.append(wicket_keeper_ratings, dmstc_batsmen_ratings[dmstc_batsmen_roles == "Wicketkeeper Batter"]  * (1-international_weight))


In [30]:
sorted_idx = np.argsort(wicket_keeper_ratings)[::-1]
top_wicketkeepers = wicket_keepers[sorted_idx][:4]
top_wicketkeepers_ratings = wicket_keeper_ratings[sorted_idx][:4]
# 4 wicket keepers becuase, mathew wade is retired now and tim paine is on break untill furthur notice

In [31]:
top_wicketkeepers, top_wicketkeepers_ratings

(array(['Matthew Wade', 'Tim Paine', '[D] Alex Carey', '[D] Josh Inglis'],
       dtype='<U20'),
 array([0.0891, 0.0603, 0.0478, 0.0458], dtype=float32))

### 4 All-rounders

In [32]:
intl_batsmen_roles

array(['Batting Allrounder', 'Opening Batter', 'Opening Batter',
       'Batting Allrounder', 'Wicketkeeper Batter', 'Middle order Batter',
       'Wicketkeeper Batter', 'Middle order Batter', 'Top order Batter'],
      dtype='<U19')

In [33]:
intl_batsmen[(intl_batsmen_roles == "Allrounder") | (intl_batsmen_roles == "Batting Allrounder")]

array(['Cameron Green', 'Marnus Labuschagne'], dtype='<U18')

In [34]:
# top wicket keepers
all_rounders = intl_batsmen[(intl_batsmen_roles == "Allrounder") | (intl_batsmen_roles == "Batting Allrounder")]
all_rounder_ratings = intl_batsmen_ratings[(intl_batsmen_roles == "Allrounder") | (intl_batsmen_roles == "Batting Allrounder")] * international_weight

all_rounders_d = dmstc_batsmen[(dmstc_batsmen_roles == "Allrounder") | (dmstc_batsmen_roles == "Batting Allrounder")]
all_rounders_d = np.array(["[D] " + d for d in all_rounders_d])
all_rounders = np.append(all_rounders, all_rounders_d)
all_rounder_ratings = np.append(all_rounder_ratings, dmstc_batsmen_ratings[(dmstc_batsmen_roles == "Allrounder") | (dmstc_batsmen_roles == "Batting Allrounder")]  * (1-international_weight))


In [51]:
sorted_idx = np.argsort(all_rounder_ratings)[::-1]
top_all_rounders = all_rounders[sorted_idx][:5]
top_all_rounders_ratings = all_rounder_ratings[sorted_idx][:5]
#4 all rounders because in previous ashes four all rounders were picked

In [52]:
top_all_rounders, top_all_rounders_ratings

(array(['Marnus Labuschagne', 'Cameron Green', '[D] Sean Abbott',
        '[D] Cameron Green', '[D] Marcus Stoinis'], dtype='<U22'),
 array([0.243     , 0.08459999, 0.0485    , 0.0478    , 0.0425    ],
       dtype=float32))

### 3 Spinners

In [37]:
intl_bowler_roles, intl_bowling_styles

(array(['Bowler', 'Bowler', 'Bowler', 'Bowler'], dtype='<U6'),
 array(['Right arm Fast medium', 'Left arm Fast', 'Right arm Offbreak',
        'Right arm Fast'], dtype='<U21'))

In [38]:
# top wicket keepers
spinners = intl_bowlers[np.array([st in intl_spin_types for st in intl_bowling_styles])]
spinner_ratings = intl_bowler_ratings[np.array([st in intl_spin_types for st in intl_bowling_styles])] * international_weight

spinners_d = dmstc_bowlers[np.array([st in dmstc_spin_types for st in dmstc_bowling_styles])]
spinners_d = np.array(["[D] " + d for d in spinners_d])
spinners = np.append(spinners, spinners_d)
spinner_ratings = np.append(spinner_ratings, dmstc_bowler_ratings[np.array([st in dmstc_spin_types for st in dmstc_bowling_styles])]  * (1-international_weight))


In [39]:
#sorted_idx = np.argsort(spinner_ratings)[::-1]
#top_spinners = spinners[sorted_idx][:int(n_bowlers * spinner_ratio)]
#top_spinner_ratings = spinner_ratings[sorted_idx][:int(n_bowlers * spinner_ratio)]

In [53]:
sorted_idx = np.argsort(spinner_ratings)[::-1]
top_spinners = spinners[sorted_idx][:3]
top_spinner_ratings = spinner_ratings[sorted_idx][:3]
#3 spinners

In [54]:
top_spinners, top_spinner_ratings

(array(['Nathan Lyon', "[D] Steve O'Keefe", '[D] Mitchell Swepson'],
       dtype='<U21'),
 array([0.2502, 0.0563, 0.0416], dtype=float32))

### 4 Non spinners (i.e., fast, medium etc.)

In [42]:
intl_bowlers[np.array([st not in intl_spin_types for st in intl_bowling_styles])]

array(['Josh Hazlewood', 'Mitchell Starc', 'Pat Cummins'], dtype='<U14')

In [43]:
# top fasts
non_spinners = intl_bowlers[np.array([st not in intl_spin_types for st in intl_bowling_styles])]
non_spinner_ratings = intl_bowler_ratings[np.array([st not in intl_spin_types for st in intl_bowling_styles])] * international_weight

non_spinners_d = dmstc_bowlers[np.array([st not in dmstc_spin_types for st in dmstc_bowling_styles])]
non_spinners_d = np.array(["[D] " + d for d in non_spinners_d])
non_spinners = np.append(non_spinners, non_spinners_d)
non_spinner_ratings = np.append(non_spinner_ratings, dmstc_bowler_ratings[np.array([st not in dmstc_spin_types for st in dmstc_bowling_styles])]  * (1-international_weight))


In [44]:
dmstc_bowler_ratings[np.array([st not in dmstc_spin_types for st in dmstc_bowling_styles])]  * (1-international_weight)

array([0.0202, 0.0365, 0.0365, 0.0286, 0.0263, 0.0234, 0.0256, 0.0126,
       0.0201, 0.022 , 0.0141, 0.0385, 0.0217, 0.0176, 0.0238, 0.0172,
       0.0475, 0.0352, 0.0306, 0.0324, 0.0326, 0.0199, 0.0232, 0.0223,
       0.0221], dtype=float32)

In [55]:
sorted_idx = np.argsort(non_spinner_ratings)[::-1]
top_non_spinners = non_spinners[sorted_idx][:5]
top_non_spinner_ratings = non_spinner_ratings[sorted_idx][:5]
#5 fast bowlers due to repition of starc

In [56]:
top_non_spinners, top_non_spinner_ratings

(array(['Josh Hazlewood', 'Mitchell Starc', 'Pat Cummins',
        '[D] Mitchell Starc', '[D] Joe Mennie'], dtype='<U21'),
 array([0.3726, 0.1683, 0.162 , 0.0475, 0.0385], dtype=float32))

### 6 Batsmen

In [47]:
intl_batsmen_roles

array(['Batting Allrounder', 'Opening Batter', 'Opening Batter',
       'Batting Allrounder', 'Wicketkeeper Batter', 'Middle order Batter',
       'Wicketkeeper Batter', 'Middle order Batter', 'Top order Batter'],
      dtype='<U19')

In [48]:
# top wicket keepers
batsmen = intl_batsmen[(intl_batsmen_roles != "Allrounder") & (intl_batsmen_roles != "Batting Allrounder") & (intl_batsmen_roles != "Bowling Allrounder") & (intl_batsmen_roles != "Wicketkeeper Batter")]
batsmen_ratings = intl_batsmen_ratings[(intl_batsmen_roles != "Allrounder") & (intl_batsmen_roles != "Batting Allrounder") & (intl_batsmen_roles != "Bowling Allrounder") & (intl_batsmen_roles != "Wicketkeeper Batter")] * international_weight

batsmen_d = dmstc_batsmen[(dmstc_batsmen_roles != "Allrounder") & (dmstc_batsmen_roles != "Batting Allrounder") & (dmstc_batsmen_roles != "Bowling Allrounder") & (dmstc_batsmen_roles != "Wicketkeeper Batter")]
batsmen_d = np.array(["[D] " + d for d in batsmen_d])
batsmen = np.append(batsmen, batsmen_d)
batsmen_ratings = np.append(batsmen_ratings, dmstc_batsmen_ratings[(dmstc_batsmen_roles != "Allrounder") & (dmstc_batsmen_roles != "Batting Allrounder") & (dmstc_batsmen_roles != "Bowling Allrounder") & (dmstc_batsmen_roles != "Wicketkeeper Batter")]  * (1-international_weight))


In [49]:
sorted_idx = np.argsort(batsmen_ratings)[::-1]
top_batsmen = batsmen[sorted_idx][:6]
top_batsmen_ratings = batsmen_ratings[sorted_idx][:6]
#6 specialist batsmen becuase in previous ashes 6 specialist batsmen were picked

In [50]:
top_batsmen, top_batsmen_ratings

(array(['Steven Smith', 'Usman Khawaja', 'Marcus Harris', 'David Warner',
        'Travis Head', '[D] Nic Maddinson'], dtype='<U20'),
 array([0.35189998, 0.33749998, 0.3294    , 0.2871    , 0.21689999,
        0.055     ], dtype=float32))