In [162]:
import pandas as pd
import numpy as np

In [163]:
raw_data = pd.read_excel("/content/drive/MyDrive/6671501a18c55_round2_input.xlsx").convert_dtypes()
raw_data['dismissed'] = raw_data['wicket kind'].notnull().astype(int)
print(raw_data.describe())
raw_data.info()
raw_data.head()

            match id      batsman_id  is_batsman_captain  is_batsman_keeper  \
count        24875.0         24875.0             24875.0            24875.0   
mean     9100156.528  4102710.123176            0.113889            0.11397   
min        8587795.0         31464.0                 0.0                0.0   
25%        8778818.0       2231928.0                 0.0                0.0   
50%        9085047.0       3706065.0                 0.0                0.0   
75%        9388448.0       6003584.0                 0.0                0.0   
max        9887856.0       9899140.0                 1.0                1.0   
std    313392.642151  2421528.438688            0.317683           0.317781   

         inning       runs  balls_faced  over_faced_first       bowler_id  \
count   24875.0    24875.0      24875.0           24875.0         18368.0   
mean   1.486312  18.300302    14.129447          9.155811  4332079.693598   
min         1.0        0.0          0.0               1.1

Unnamed: 0,match id,batsman,batsman_id,batsman_dob,batsman_details,is_batsman_captain,is_batsman_keeper,inning,runs,balls_faced,...,bowler_dob,bowler_details,is_bowler_keeper,is_bowler_captain,strike_rate,Fours,Sixes,match_dt,ingestion_timestamp,dismissed
0,8587795,AJ Fh,37351,1986-11-17,AUS:Right-hand bat:Slow left-arm orthodox:,1,0,2,14,15,...,1997-02-05,AUS:Right-hand bat:Right-arm fast:,0.0,0.0,93.33,1.0,,2021-01-08,2022-03-22 12:43:21,1
1,8587795,Md Ni,181404,1985-01-01,AFG:Right-hand bat:Right-arm offbreak:,0,0,2,71,41,...,,,,,173.17,9.0,2.0,2021-01-08,2022-03-22 12:43:21,0
2,8587795,JW Ws,1635773,1988-08-13,AUS:Right-hand bat:Right-arm medium:,0,0,1,16,13,...,,,,,123.08,1.0,,2021-01-08,2022-03-22 12:43:21,0
3,8587795,DR Bs,2170762,1991-04-30,ENG:Right-hand bat:Slow left-arm orthodox:,0,0,1,2,4,...,1991-02-12,AUS:Right-hand bat:Right-arm fast-medium:,0.0,0.0,50.0,,,2021-01-08,2022-03-22 12:43:21,1
4,8587795,AT Cy,2285051,1991-08-27,AUS:Left-hand bat:None:,1,1,1,42,25,...,1988-12-18,PAK:Left-hand bat:Slow left-arm orthodox:,0.0,0.0,168.0,3.0,3.0,2021-01-08,2022-03-22 12:43:21,1


# **Metrics For Consistency and Recency:**


# Recency:

Since sample sizes of most players are very small, score calculated over a window of last 3 games is used

# Consistency:

**Batsman:**

std_deviation(strike_rate)/sqrt(matches_played) + std_deviation(runs scored)/sqrt(matches_played)


**Bowlers:**

std_deviation(wickets)/sqrt(matches_played) + std_deviation(economy)/sqrt(matches_played)

# Combined score:

average of standardized recency score and -1 * standardized consistency score (lower consistency score is better)


In [164]:
#Grouping with batsman_id
grouped = raw_data.groupby('batsman_id')
total_runs = grouped['runs'].sum()
total_balls_faced = grouped['balls_faced'].sum()
times_out = grouped['dismissed'].sum()
average = total_runs / times_out
strike_rate = (total_runs / total_balls_faced) * 100
fifties = grouped.apply(lambda x: (x['runs'].between(50, 99)).sum())
hundreds = grouped.apply(lambda x: (x['runs'] >= 100).sum())
keeper_status = grouped['is_batsman_keeper'].sum() >= 2

#Recency metric
grouped_recent = grouped.head(3).groupby('batsman_id')
total_runs_recent = grouped_recent['runs'].sum()
total_balls_faced_recent = grouped_recent['balls_faced'].sum()
times_out_recent = grouped_recent['dismissed'].sum()
average_recent = total_runs_recent / times_out_recent
strike_rate_recent = (total_runs_recent / total_balls_faced_recent) * 100
fifties_recent = grouped_recent.apply(lambda x: (x['runs'].between(50, 99)).sum())
hundreds_recent = grouped_recent.apply(lambda x: (x['runs'] >= 100).sum())

#Consistency metric
runs_std = grouped['runs'].std().fillna(0)
strike_std = grouped['strike_rate'].std().fillna(0)
num_matches_played = grouped.size()
consistency_score = (runs_std / np.sqrt(num_matches_played)) + (strike_std / np.sqrt(num_matches_played))

bat_data = pd.DataFrame({
    'total_runs': total_runs,
    'total_balls_faced': total_balls_faced,
    'times_out': times_out,
    'average': average,
    'strike_rate': strike_rate,
    'fifties': fifties,
    'hundreds': hundreds,
    'is_keeper': keeper_status,
    'strike_rate_recent': strike_rate_recent,
    'average_recent': average_recent,
    'fifties_recent': fifties_recent,
    'hundreds_recent': hundreds_recent,
    'consistency_score': consistency_score
}).reset_index()

print(bat_data)

      batsman_id  total_runs  total_balls_faced  times_out    average  \
0          31464          10                 18          3   3.333333   
1          34061         729                509         34  21.441176   
2          36665         208                171          9  23.111111   
3          37351        1517               1259         60  25.283333   
4          41740         976                724         32       30.5   
...          ...         ...                ...        ...        ...   
2058     9822322           0                  1          1        0.0   
2059     9822392           0                  1          1        0.0   
2060     9874052          42                 20          1       42.0   
2061     9883250          20                 25          0        inf   
2062     9899140           4                 10          1        4.0   

      strike_rate  fifties  hundreds  is_keeper  strike_rate_recent  \
0       55.555556        0         0      False     

In [165]:
def score_tier(value, tiers, scores):
    for i, tier in enumerate(tiers):
        if value >= tier:
            return scores[i]
    return scores[-1]

# Define tiers and scores for each metric
strike_rate_tiers = [150, 100, 80]
strike_rate_scores = [50, 40, 30, 0]

average_tiers = [50, 40, 30]
average_scores = [30, 20, 10, 5]

hundreds_tiers = [3, 2, 1]
hundreds_scores = [30, 20, 10, 0]

fifties_tiers = [5, 3, 1]
fifties_scores = [20, 10, 5, 0]

# Apply the scoring function to each metric
bat_data['strike_rate_score'] = bat_data['strike_rate'].apply(score_tier, args=(strike_rate_tiers, strike_rate_scores))
bat_data['average_score'] = bat_data['average'].apply(score_tier, args=(average_tiers, average_scores))
bat_data['hundreds_score'] = bat_data['hundreds'].apply(score_tier, args=(hundreds_tiers, hundreds_scores))
bat_data['fifties_score'] = bat_data['fifties'].apply(score_tier, args=(fifties_tiers, fifties_scores))
bat_data['total_score'] = (bat_data['strike_rate_score'] + bat_data['average_score'] + bat_data['hundreds_score'] + bat_data['fifties_score'])

#Apply scoring for recent metrics
bat_data['striking_rate_score_recent'] = bat_data['strike_rate_recent'].apply(score_tier, args=(strike_rate_tiers, strike_rate_scores))
bat_data['average_score_recent'] = bat_data['average_recent'].apply(score_tier, args=(average_tiers, average_scores))
bat_data['hundreds_score_recent'] = bat_data['hundreds_recent'].apply(score_tier, args=(hundreds_tiers, hundreds_scores))
bat_data['fifties_score_recent'] = bat_data['fifties_recent'].apply(score_tier, args=(fifties_tiers, fifties_scores))
bat_data['total_score_recent'] = (bat_data['striking_rate_score_recent'] + bat_data['average_score_recent'] + bat_data['hundreds_score_recent'] + bat_data['fifties_score_recent'])

#Standardizing and combining recency and consistency metrics
bat_data['z_recent_score'] = (bat_data['total_score_recent'] - bat_data['total_score_recent'].mean()) / bat_data['total_score_recent'].std()
bat_data['z_consistency_score'] = -1 * (bat_data['consistency_score'] - bat_data['consistency_score'].min()) / bat_data['consistency_score'].std()
bat_data['z_consistency_score'] = bat_data['z_consistency_score'] - bat_data['z_consistency_score'].mean()
bat_data['combined_score'] = (bat_data['z_recent_score'] + bat_data['z_consistency_score']) / 2

print(bat_data.describe())

           batsman_id  total_runs  total_balls_faced    times_out  average  \
count          2063.0      2063.0             2063.0  2063.000000   2063.0   
mean   5210708.532719  220.659234         170.368396     9.410567     <NA>   
std    2699286.898444   424.83264          305.52227    14.353667     <NA>   
min           31464.0         0.0                1.0     0.000000     <NA>   
25%         2969500.5        12.0               14.0     1.000000     <NA>   
50%         4950294.0        48.0               45.0     3.000000     <NA>   
75%         7881810.0       213.0              176.5    11.000000     <NA>   
max         9899140.0      4031.0             3083.0   103.000000     <NA>   

       strike_rate      fifties     hundreds  strike_rate_recent  \
count       2063.0  2063.000000  2063.000000              2063.0   
mean    104.034845     1.058652     0.056229          103.685457   
std      44.521223     2.843126     0.327733           50.259803   
min            0.0     0.

In [166]:
keepers = bat_data[bat_data['is_keeper']].sort_values('total_score', ascending=False).head(5)[['batsman_id','total_score','total_score_recent','consistency_score','combined_score']]
print(keepers)

     batsman_id  total_score  total_score_recent  consistency_score  \
962     4635658          110                  65          11.095098   
304     2162782          110                  55           9.144029   
337     2263736          110                  75            7.61417   
550     3057312          100                  45            11.8635   
509     2954769          100                   5          12.742166   

     combined_score  
962         0.81309  
304        0.651041  
337        1.112703  
550        0.370712  
509       -0.497377  


batsman 2263736 has a better recent score and a better consistency score so he will be the keeper

In [167]:
bat_data[bat_data['batsman_id'] ==  2263736]

Unnamed: 0,batsman_id,total_runs,total_balls_faced,times_out,average,strike_rate,fifties,hundreds,is_keeper,strike_rate_recent,...,fifties_score,total_score,striking_rate_score_recent,average_score_recent,hundreds_score_recent,fifties_score_recent,total_score_recent,z_recent_score,z_consistency_score,combined_score
337,2263736,4031,3083,72,55.986111,130.74927,38,2,True,138.582677,...,20,110,40,30,0,5,75,1.523573,0.701834,1.112703


In [168]:
batters = bat_data[bat_data['total_runs'] > 100].sort_values('total_score', ascending=False).head(10)[['batsman_id','total_score','total_score_recent','consistency_score','combined_score']]
print(batters)

      batsman_id  total_score  total_score_recent  consistency_score  \
574      3125562          110                  65           10.35506   
341      2275195          110                  65          10.537978   
197      1749075          110                  75           9.404345   
337      2263736          110                  75            7.61417   
304      2162782          110                  55           9.144029   
962      4635658          110                  65          11.095098   
330      2231928          105                  55          13.002441   
84        313809          100                  45           9.986167   
1392     7491224          100                  55          10.946859   
1051     5061748          100                  55          15.900558   

      combined_score  
574         0.831832  
341         0.827199  
197         1.067368  
337         1.112703  
304         0.651041  
962          0.81309  
330         0.553328  
84          0.418255  


In [169]:
batters[batters['total_score'] == 110].sort_values('combined_score', ascending=False).head(10)

Unnamed: 0,batsman_id,total_score,total_score_recent,consistency_score,combined_score
337,2263736,110,75,7.61417,1.112703
197,1749075,110,75,9.404345,1.067368
574,3125562,110,65,10.35506,0.831832
341,2275195,110,65,10.537978,0.827199
962,4635658,110,65,11.095098,0.81309
304,2162782,110,55,9.144029,0.651041


In [170]:
bat_data[bat_data['batsman_id'] ==  1749075]

Unnamed: 0,batsman_id,total_runs,total_balls_faced,times_out,average,strike_rate,fifties,hundreds,is_keeper,strike_rate_recent,...,fifties_score,total_score,striking_rate_score_recent,average_score_recent,hundreds_score_recent,fifties_score_recent,total_score_recent,z_recent_score,z_consistency_score,combined_score
197,1749075,3203,2085,103,31.097087,153.621103,20,3,False,143.661972,...,20,110,40,30,0,5,75,1.523573,0.611162,1.067368


In [171]:
bat_data[bat_data['batsman_id'] ==  3125562]

Unnamed: 0,batsman_id,total_runs,total_balls_faced,times_out,average,strike_rate,fifties,hundreds,is_keeper,strike_rate_recent,...,fifties_score,total_score,striking_rate_score_recent,average_score_recent,hundreds_score_recent,fifties_score_recent,total_score_recent,z_recent_score,z_consistency_score,combined_score
574,3125562,2551,1612,72,35.430556,158.25062,19,4,False,168.421053,...,20,110,50,10,0,5,65,1.100654,0.563009,0.831832


In [172]:
bat_data[bat_data['batsman_id'] ==  2275195]

Unnamed: 0,batsman_id,total_runs,total_balls_faced,times_out,average,strike_rate,fifties,hundreds,is_keeper,strike_rate_recent,...,fifties_score,total_score,striking_rate_score_recent,average_score_recent,hundreds_score_recent,fifties_score_recent,total_score_recent,z_recent_score,z_consistency_score,combined_score
341,2275195,2480,1602,81,30.617284,154.806492,14,3,False,162.121212,...,20,110,50,10,0,5,65,1.100654,0.553744,0.827199


In [173]:
raw_bow_data = pd.read_excel("/content/drive/MyDrive/bowler_input.xlsx").convert_dtypes()
raw_bow_data.describe()
raw_bow_data['economy'] = (raw_bow_data['runs'] / raw_bow_data['balls_bowled']) * 6
raw_bow_data = raw_bow_data.sort_values('match_dt', ascending=False)

In [174]:
raw_bow_data.head()

Unnamed: 0,match id,bowler,bowler_id,bowler_dob,bowler_details,is_bowler_captain,is_bowler_keeper,inning,runs,wicket_count,balls_bowled,economy,maiden,dots,Fours,Sixes,wides,noballs,match_dt,ingestion_timestamp
18480,9717518,Sl Im,6930286,2001-06-03,BAN:Left-hand bat:Left-arm medium-fast:,0,0,2,17,2,22,4.636364,0,15,2,0,3,0,2023-12-31,2024-03-14 17:12:36.995
18435,9702804,WA Ar,6718844,1997-02-05,AUS:Right-hand bat:Right-arm fast:,0,0,2,37,1,18,12.333333,0,6,6,0,2,1,2023-12-31,2024-03-14 17:12:36.995
18425,9702804,Id Wm,1594319,1988-12-18,PAK:Left-hand bat:Slow left-arm orthodox:,0,0,1,40,0,24,10.0,0,6,4,1,1,0,2023-12-31,2024-03-14 17:12:36.995
18426,9702804,MP Ss,2275097,1989-08-16,AUS:Right-hand bat:Right-arm medium:,0,0,1,29,0,12,14.5,0,4,2,3,0,0,2023-12-31,2024-03-14 17:12:36.995
18427,9702804,GJ Ml,2275195,1988-10-14,AUS:Right-hand bat:Right-arm offbreak:,1,0,1,21,2,18,7.0,0,7,3,0,0,0,2023-12-31,2024-03-14 17:12:36.995


In [175]:
#Grouping data using bowler_id
grouped = raw_bow_data.groupby('bowler_id')
total_runs_conceded = grouped['runs'].sum()
total_balls_bowled = grouped['balls_bowled'].sum()
total_wickets_taken = grouped['wicket_count'].sum()
strike_rate = total_balls_bowled / total_wickets_taken
average = total_runs_conceded / total_wickets_taken
total_overs_bowled = total_balls_bowled / 6
economy = total_runs_conceded / total_overs_bowled
four_wicket_hauls = grouped.apply(lambda x: (x['wicket_count'] >= 4).sum())
wickets_in_matches = grouped.apply(lambda x: (x['wicket_count'] > 0).sum() >= 2)

#Recency metric
grouped_recent = grouped.head(3).groupby('bowler_id')
total_runs_conceded_r = grouped_recent['runs'].sum()
total_balls_bowled_r = grouped_recent['balls_bowled'].sum()
total_wickets_taken_r = grouped_recent['wicket_count'].sum()
strike_rate_r = total_balls_bowled_r / total_wickets_taken_r
average_r = total_runs_conceded_r / total_wickets_taken_r
total_overs_bowled_r = total_balls_bowled_r / 6
economy_r = total_runs_conceded_r / total_overs_bowled_r
four_wicket_hauls_r = grouped_recent.apply(lambda x: (x['wicket_count'] >= 4).sum())

#Consistency metric
wicket_count_std = grouped['wicket_count'].std().fillna(0)
bowlers_economy_std = grouped['economy'].std().fillna(0)
num_matches_played = grouped.size()
consistency_score = (wicket_count_std / np.sqrt(num_matches_played)) + (bowlers_economy_std / np.sqrt(num_matches_played))

bow_data = pd.DataFrame({
    'total_runs_conceded': total_runs_conceded,
    'total_balls_bowled': total_balls_bowled,
    'total_wickets_taken': total_wickets_taken,
    'strike_rate': strike_rate,
    'average': average,
    'economy': economy,
    'four_wicket_hauls': four_wicket_hauls,
    'wickets_in_2_or_more_matches': wickets_in_matches,
    'matches_played': num_matches_played,
    'consistency_score': consistency_score,
    'strike_rate_recent': strike_rate_r,
    'average_recent': average_r,
    'economy_recent': economy_r,
    'four_wicket_hauls_recent': four_wicket_hauls_r
}).reset_index()

print(bow_data.describe())

            bowler_id  total_runs_conceded  total_balls_bowled  \
count          1511.0               1511.0              1511.0   
mean   5408833.405692           312.871608          233.491727   
std    2696220.755774           447.636077          341.172163   
min           34061.0                  0.0                 1.0   
25%         3117403.5                 41.0                25.0   
50%         5425146.0                125.0                92.0   
75%         8056061.0                394.5               294.0   
max         9888584.0               3353.0              2977.0   

       total_wickets_taken  strike_rate    average   economy  \
count               1511.0       1511.0     1511.0    1511.0   
mean             12.385175          inf        inf  8.529593   
std              19.288844         <NA>       <NA>   2.50916   
min                    0.0          3.0        0.0       0.0   
25%                    1.0    16.123966  21.411826  7.222222   
50%                  

In [176]:
def score_tier_l(value, tiers, scores):
    for i, tier in enumerate(tiers):
        if value <= tier:
            return scores[i]
    return scores[-1]

def econ_score(value):
    if value <= 3:
        return 50
    if value <= 5:
        return 40
    if value < 7:
        return 30
    return 0

strike_rate_tiers = [15, 19, 24]
strike_rate_scores = [30, 20, 10, 0]

average_tiers = [20, 30, 40]
average_scores = [30, 20, 10, 0]

four_wicket_hauls_tiers = [4, 2, 1]
four_wicket_hauls_scores = [30, 20, 10, 0]

# Apply the scoring function to each metric
bow_data['strike_rate_score'] = bow_data['strike_rate'].apply(score_tier_l, args=(strike_rate_tiers, strike_rate_scores))
bow_data['average_score'] = bow_data['average'].apply(score_tier_l, args=(average_tiers, average_scores))
bow_data['economy_score'] = bow_data['economy'].apply(econ_score)
bow_data['four_wicket_hauls_score'] = bow_data['four_wicket_hauls'].apply(score_tier, args=(four_wicket_hauls_tiers, four_wicket_hauls_scores))
bow_data['total_score'] = (bow_data['strike_rate_score'] + bow_data['average_score'] + bow_data['economy_score'] + bow_data['four_wicket_hauls_score'])
bow_data['strike_rate_score_recent'] = bow_data['strike_rate_recent'].apply(score_tier_l, args=(strike_rate_tiers, strike_rate_scores))
bow_data['average_score_recent'] = bow_data['average_recent'].apply(score_tier_l, args=(average_tiers, average_scores))
bow_data['economy_score_recent'] = bow_data['economy_recent'].apply(econ_score)
bow_data['four_wicket_hauls_score_recent'] = bow_data['four_wicket_hauls_recent'].apply(score_tier, args=(four_wicket_hauls_tiers, four_wicket_hauls_scores))
bow_data['total_score_recent'] = (bow_data['strike_rate_score_recent'] + bow_data['average_score_recent'] + bow_data['economy_score_recent'] + bow_data['four_wicket_hauls_score_recent'])

#Standardizing recency and consistency and taking average
bow_data['z_recent_score'] = (bow_data['total_score_recent'] - bow_data['total_score_recent'].mean()) / bow_data['total_score_recent'].std()
bow_data['z_consistency_score'] = -1 * (bow_data['consistency_score'] - bow_data['consistency_score'].min()) / bow_data['consistency_score'].std()
bow_data['z_consistency_score'] = bow_data['z_consistency_score'] - bow_data['z_consistency_score'].mean()
bow_data['combined_score'] = (bow_data['z_recent_score'] + bow_data['z_consistency_score']) / 2

print(bow_data.describe())

            bowler_id  total_runs_conceded  total_balls_bowled  \
count          1511.0               1511.0              1511.0   
mean   5408833.405692           312.871608          233.491727   
std    2696220.755774           447.636077          341.172163   
min           34061.0                  0.0                 1.0   
25%         3117403.5                 41.0                25.0   
50%         5425146.0                125.0                92.0   
75%         8056061.0                394.5               294.0   
max         9888584.0               3353.0              2977.0   

       total_wickets_taken  strike_rate    average   economy  \
count               1511.0       1511.0     1511.0    1511.0   
mean             12.385175          inf        inf  8.529593   
std              19.288844         <NA>       <NA>   2.50916   
min                    0.0          3.0        0.0       0.0   
25%                    1.0    16.123966  21.411826  7.222222   
50%                  

In [177]:
#Filter bowlers
bowlers = bow_data[bow_data['total_wickets_taken'] >= 10].sort_values('total_score', ascending=False)[['bowler_id','total_score','consistency_score','total_score_recent','combined_score']].head(10)
print(bowlers)

      bowler_id  total_score  consistency_score  total_score_recent  \
1492    9455557          110            1.61773                 120   
778     5554254          110           0.321448                   0   
784     5595302          110            1.49615                  60   
937     7005550          100           2.593752                 110   
1246    8246440          100           0.918106                 100   
221     2288789          100           0.925495                  30   
1064    7759443          100           1.552449                  60   
311     2789604          100            1.66263                 110   
335     2943443          100           1.381313                  80   
1247    8246573          100           1.070534                  90   

      combined_score  
1492        1.167787  
778        -0.113347  
784         0.290042  
937         0.572743  
1246        1.172081  
221         0.080827  
1064        0.264684  
311         0.992145  
335        

3 Bowlers have a score of 110 so we select all of them

In [178]:
# Merge based on batsman_id / bowler_id
merged = pd.merge(bat_data, bow_data, left_on='batsman_id', right_on='bowler_id', suffixes=('_batsman', '_bowler'), how='outer')
merged['batsman_id'] = merged['batsman_id'].fillna(merged['bowler_id'])

#All-rounder conditions
allRound = merged[(merged['total_runs'] > 10) & merged['wickets_in_2_or_more_matches']]
allRound['total_score'] = allRound['total_score_batsman'] + allRound['total_score_bowler']
allRound = allRound.sort_values('total_score', ascending=False)
allRound['combined_score'] = (allRound['combined_score_batsman'] + allRound['combined_score_bowler']) / 2

allrounder = allRound[['batsman_id','total_score_batsman','total_score_bowler','total_score','combined_score']]
print(allrounder.head(10))


      batsman_id  total_score_batsman  total_score_bowler  total_score  \
350      2288789                 80.0               100.0        180.0   
65        251509                 70.0               100.0        170.0   
1021     4908532                 70.0                90.0        160.0   
1114     5554254                 50.0               110.0        160.0   
624      3298427                 65.0                90.0        155.0   
1558     7906968                 65.0                90.0        155.0   
277      2104339                 75.0                80.0        155.0   
1300     6718802                 65.0                90.0        155.0   
473      2789086                 50.0               100.0        150.0   
4          41740                 70.0                80.0        150.0   

      combined_score  
350         0.234818  
65          0.979125  
1021        0.651636  
1114        0.273425  
624         0.450346  
1558        0.641342  
277          0.39032  
1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  allRound['total_score'] = allRound['total_score_batsman'] + allRound['total_score_bowler']


In [179]:
#We need to select one all rounder with score 155 as one all rounder with score 160 has already been selected as a bowler so we get their combined consistency and recency scores
allrounder[allrounder['total_score'] == 155].sort_values(by=['combined_score'], ascending=False)

Unnamed: 0,batsman_id,total_score_batsman,total_score_bowler,total_score,combined_score
1558,7906968,65.0,90.0,155.0,0.641342
624,3298427,65.0,90.0,155.0,0.450346
1300,6718802,65.0,90.0,155.0,0.438551
277,2104339,75.0,80.0,155.0,0.39032


In [180]:
#Get stats of selected 11
merged[merged['batsman_id'].isin([9455557,
5554254,
5595302,
2263736,
1749075,
3125562,
2275195,
2288789,
251509,
4908532,
7906968])][['batsman_id','total_runs','strike_rate_batsman','average_batsman','hundreds','fifties','total_wickets_taken','strike_rate_bowler','economy','average_bowler','four_wicket_hauls']]

Unnamed: 0,batsman_id,total_runs,strike_rate_batsman,average_batsman,hundreds,fifties,total_wickets_taken,strike_rate_bowler,economy,average_bowler,four_wicket_hauls
65,251509,42.0,190.909091,42.0,0.0,0.0,5.0,12.0,3.2,6.4,0.0
197,1749075,3203.0,153.621103,31.097087,3.0,20.0,,,,,
337,2263736,4031.0,130.74927,55.986111,2.0,38.0,,,,,
341,2275195,2480.0,154.806492,30.617284,3.0,14.0,36.0,24.5,7.85034,32.055556,0.0
350,2288789,729.0,138.068182,36.45,1.0,5.0,41.0,13.439024,6.871143,15.390244,1.0
574,3125562,2551.0,158.25062,35.430556,4.0,19.0,,,,,
1021,4908532,119.0,107.207207,59.5,0.0,0.0,3.0,14.0,5.285714,12.333333,0.0
1114,5554254,795.0,148.876404,13.032787,0.0,1.0,175.0,17.011429,6.75781,19.16,6.0
1123,5595302,8.0,47.058824,2.0,0.0,0.0,21.0,11.428571,5.175,9.857143,2.0
1558,7906968,493.0,132.526882,44.818182,0.0,2.0,17.0,15.529412,6.704545,17.352941,1.0
