In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import random

In [110]:
def LinearRegressor(table, random):
    X = table.iloc[:, :-1]
    y = table.iloc[:, -1]
    
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.35, random_state = random)
    
    model = LinearRegression()
    model.fit(x_train,y_train)
    r_sq = model.score(x_test, y_test)
    
    y_pred = model.predict(X)
    
# #     print('intercept:', model.intercept_)
   # print('R2_Score:', r_sq)
# #    print('slope:', model.coef_)  
    return model, model.coef_, r_sq

In [4]:
Bowler_Table = pd.read_excel('2013-2018_Bowler_table.xlsx')

In [7]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(float)

In [10]:
Bowler_Table.drop(Bowler_Table.columns[:5], axis = 1, inplace = True)

In [11]:
Bowler_Table = clean_dataset(Bowler_Table)

In [13]:
def findMaxRState(table):
    maxim = dict()
    for count in range(0,1000):
        r_score = LinearRegressor(table, count)[2]
        maxim[count] = r_score
    return max(maxim, key=maxim.get)

In [14]:
findMaxRState(Bowler_Table)

931

In [16]:
LinearRegressor(Bowler_Table, 931)

(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 array([ 1.36174639e-02, -3.03953460e-01,  1.73593157e-02,  6.11161484e-03,
         3.96281254e-02,  2.37957783e-01, -1.24493457e+00, -3.48869010e-01,
         2.63772938e+00, -5.15906762e-15, -1.36039963e-01,  1.79667965e-01,
         4.04368540e-01, -1.29978056e-01,  1.75611591e-01,  8.59046717e-01,
        -3.11765735e-02,  5.70062921e-02,  9.46031068e-01,  2.44249065e-15,
        -8.04272797e-02, -5.89526119e-01, -3.10862447e-15, -4.44089210e-16,
         6.69953399e-01,  4.66293670e-15,  6.71684930e-15, -7.93654504e-01,
         7.93654504e-01,  1.36174639e-02,  1.01672376e+00, -4.19927788e-01,
         1.30692069e-02,  9.50420140e-03,  1.24070698e-02, -4.06496462e-02,
        -1.27075404e-01,  2.00090187e+00, -2.25736611e+00, -8.92591862e-02,
         3.04083820e-01, -1.37342454e-01, -2.83235255e-01,  4.32500329e-01,
         2.62376539e-01, -5.69269211e-01,  9.73389415e-01,  2.27462817e+00,
      

In [17]:
def tryoutColumns(table, num):
    arr = []
    while len(arr) < num:
        gen = random.randint(0, len(table.columns)-2)
        if gen not in arr:
            arr.append(gen)
    
    names = []
    for elm in arr:
        names.append(table.columns[elm])
    return names

In [20]:
def returnMaxR2Score(table, num):
    cols = tryoutColumns(table, num)
    cols.append('IPL Pts per Match Year+2')
    bats = table[cols]
    r2_score = LinearRegressor(bats, findMaxRState(bats))[2]
    return bats, cols, r2_score
    

In [21]:
returnMaxR2Score(Bowler_Table, 30)[2]

0.20240728196701707

In [22]:
def SquaredTable(table):
    for col in table.columns:
        if type(table[col].iloc[0]) != str:
            table[str(col) + ' ** 2'] = table[col] * table[col]
    
    return table

In [24]:
SquaredTable(Bowler_Table).to_excel('Squared_Bowler.xlsx')

In [25]:
Bowler_Table = pd.read_excel('Squared_Bowler.xlsx')

In [76]:
returnMaxR2Score(Bowler_Table, 30)[2]

0.22072709060256124

In [146]:
def OutputR2Score(table):
    table = clean_dataset(table)
    if len(table) == 0:
        print("TABLE HAS NO ROWS! ERROR")
    random_state = findMaxRState(table)
    r_sq = LinearRegressor(table, random_state)[2]
    
    dictionary = dict()
    for i in range(10, len(table.columns)):
        max_r2 = returnMaxR2Score(table, i)[2]
        dictionary[i] = max_r2
        
#     cols = returnMaxR2Score(table, max(dictionary, key=dictionary.get))[1]
#     return table[cols], cols, random_state
# #     print(dictionary)
    return dictionary[max(dictionary, key=dictionary.get)], returnMaxR2Score(table, max(dictionary, key=dictionary.get))[1], max(dictionary, key=dictionary.get), dictionary

In [148]:
Bowler_Table = pd.read_excel('Squared_Bowler.xlsx')
array_of_results = OutputR2Score(Bowler_Table)
array_of_results

(0.4420127063607032,
 ['Year Econ Rate ** 2',
  'Year Bowling Average in Death Overs',
  'Year Bowling Average in Powerplay Overs',
  'Year Wickets Taken',
  'Year+1 Bowling SR in Death Overs',
  'Year Bowling SR T20I',
  'Year+1 Econ Rate ** 2',
  'Year Bowling Average',
  'Year Bowling Econ. Rate T20I ** 2',
  'Year Bowling SR in Powerplay Overs ** 2',
  'Year Runs Conceded',
  'Year 5w',
  'Year Bowling Strike Rate ** 2',
  'Year+1 Bowling Econ. Rate T20I',
  0,
  'Year+1 Bowling SR T20I ** 2',
  'Year+1 Spin',
  'Year+1 Bowling Econ. Rate in Powerplay Overs',
  'Year Bowling SR in Death Overs',
  'Year Age ** 2',
  'IPL Pts per Match Year+2'],
 20,
 {10: 0.1719422560424031,
  11: 0.17699601010921928,
  12: 0.2981201135843454,
  13: 0.11756847448481145,
  14: 0.22851971126340487,
  15: 0.2942092021512148,
  16: 0.08865820961251958,
  17: 0.2799142859392917,
  18: 0.30834339924597853,
  19: 0.12024081114794517,
  20: 0.4420127063607032,
  21: 0.34577825998890954,
  22: 0.401492040090

In [149]:
test_table = Bowler_Table[['Year Econ Rate ** 2',
  'Year Bowling Average in Death Overs',
  'Year Bowling Average in Powerplay Overs',
  'Year Wickets Taken',
  'Year+1 Bowling SR in Death Overs',
  'Year Bowling SR T20I',
  'Year+1 Econ Rate ** 2',
  'Year Bowling Average',
  'Year Bowling Econ. Rate T20I ** 2',
  'Year Bowling SR in Powerplay Overs ** 2',
  'Year Runs Conceded',
  'Year 5w',
  'Year Bowling Strike Rate ** 2',
  'Year+1 Bowling Econ. Rate T20I',
  0,
  'Year+1 Bowling SR T20I ** 2',
  'Year+1 Spin',
  'Year+1 Bowling Econ. Rate in Powerplay Overs',
  'Year Bowling SR in Death Overs',
  'Year Age ** 2',
  'IPL Pts per Match Year+2']]

In [150]:
LinearRegressor(test_table, findMaxRState(Bowler_Table))

(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 array([-2.61678351e-02, -7.17193688e-02,  2.84994746e-02,  3.13015765e-01,
        -5.18176076e-02,  1.89596173e-01,  2.82884327e-02,  9.62391308e-02,
         1.07599279e-02, -1.88709448e-04, -1.52525707e-02, -3.74176809e+00,
        -2.48702857e-03,  3.19729315e-01,  3.55271368e-15,  1.09651807e-03,
        -5.21141949e-01, -1.32344459e-01,  2.15165643e-01,  4.50201481e-04]),
 -0.04377186054597626)

In [151]:
LinearRegressor(test_table, findMaxRState(test_table))[2]

0.2768862636504932

In [172]:
a = returnMaxR2Score(Bowler_Table, 20)
while a[2] < 0.40:
    a = returnMaxR2Score(Bowler_Table, 20)

In [192]:

LinearRegressor(a[0], findMaxRState(a[0]))

(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 array([-6.78179743e-03,  3.67555906e-02, -1.99064838e-02,  1.28579516e-03,
        -8.59857497e-04,  6.94987732e-02,  7.90380132e-03, -1.26782540e-02,
         4.55297896e-01,  3.59556386e-02, -8.09083363e-02, -1.67417902e-01,
        -3.85326050e-05, -1.65241060e-02,  2.59929468e-03,  2.37765931e-01,
         1.51558176e-01,  2.57263108e-01,  1.70930127e-03,  2.90498822e-03]),
 0.45275061018589513)

In [182]:
a[0].to_excel('BowlerFinal.xlsx')

In [185]:
read

Unnamed: 0.1,Unnamed: 0,Year Econ Rate ** 2,Year+1 Bowling Econ. Rate T20I ** 2,Year Bowling Innings ** 2,Year Age ** 2,Year+1 Bowling Average in Death Overs ** 2,Year Bowling SR in Death Overs,Year Wickets Taken ** 2,Year+1 Bowling Average in Death Overs,Year+1 Econ Rate,...,Year+1 Fast,Year Bowling Average in Death Overs ** 2,Year+1 Bowling Innings ** 2,Year Bowling SR T20I ** 2,Year+1 Wickets Taken,Year+1 Bowling SR in Death Overs,Year Bowling Innings,Year+1 Bowling SR T20I ** 2,Year Bowling Econ. Rate in Powerplay Overs ** 2,IPL Pts per Match Year+2
0,0,41.2164,69.693710,4,576,769.320105,12.601727,9,27.736620,7.88,...,1,433.750285,16.0,422.194349,3.0,16.518863,2,435.369810,46.063872,11.500000
1,1,76.9129,69.693710,100,1089,90.250000,10.571429,121,9.500000,9.46,...,0,377.469388,16.0,422.194349,8.0,6.166667,10,435.369810,65.610000,15.906250
2,2,89.8704,69.693710,169,784,12996.000000,12.818182,256,114.000000,8.70,...,0,615.942149,81.0,422.194349,5.0,56.000000,13,435.369810,57.760000,3.250000
3,3,32.0356,60.840000,289,841,522.448980,8.812500,361,22.857143,7.69,...,1,103.785156,196.0,112.360000,11.0,15.000000,17,148.840000,25.000000,10.166667
4,4,65.2864,69.693710,36,576,7569.000000,13.000000,64,87.000000,8.18,...,0,225.000000,64.0,422.194349,6.0,49.000000,6,435.369810,60.840000,13.909091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,87,50.2681,40.960000,121,729,576.000000,23.500000,121,24.000000,6.98,...,0,1024.000000,225.0,7396.000000,10.0,24.666667,11,384.160000,54.760000,23.718750
88,88,58.5225,132.250000,121,729,441.000000,18.000000,81,21.000000,9.60,...,0,812.250000,9.0,384.595679,3.0,13.000000,11,625.000000,51.840000,12.500000
89,89,95.0625,70.560000,1,676,1089.000000,13.000000,1,33.000000,9.06,...,1,676.000000,36.0,338.560000,5.0,18.500000,1,179.560000,38.440000,14.535714
90,90,81.0000,63.872064,81,784,180.326531,6.444444,100,13.428571,8.45,...,1,149.382716,196.0,384.595679,17.0,7.142857,9,343.879936,60.840000,15.928571


In [186]:
read = pd.read_excel('BowlerFinal.xlsx', index_col=0)
random2 = findMaxRState(read)
print(random2)
LinearRegressor(read, random2)

680


(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 array([-6.78179743e-03,  3.67555906e-02, -1.99064838e-02,  1.28579516e-03,
        -8.59857497e-04,  6.94987732e-02,  7.90380132e-03, -1.26782540e-02,
         4.55297896e-01,  3.59556386e-02, -8.09083363e-02, -1.67417902e-01,
        -3.85326050e-05, -1.65241060e-02,  2.59929468e-03,  2.37765931e-01,
         1.51558176e-01,  2.57263108e-01,  1.70930127e-03,  2.90498822e-03]),
 0.45275061018589513)

In [144]:
test = Bowler_Table[['Year Bowling Econ. Rate in Death Overs ** 2',
  'Year Age ** 2',
  'Year Econ Rate',
  'Year Bowling SR T20I',
  'Year Wickets Taken',
  'Year+1 5w',
  'Year Bowling Econ. Rate in Powerplay Overs',
  'Year+1 Overs ** 2',
  'Year+1 Bowling Econ. Rate T20I ** 2',
  'Year+1 Bowling Average ** 2',
  'Year Bowling SR in Death Overs ** 2',
  'Year+1 Econ Rate',
  'Year+1 Age ** 2',
  'Year Bowling Average ** 2',
  'Year+1 Bowling Econ. Rate T20I',
  'Year+1 4w ** 2',
  'Year Bowling Innings',
  'IPL Pts per Match Year+2']]
LinearRegressor(test, findMaxRState(test))[2]

0.32618716173837076

In [140]:
LogicFeatures = pd.read_excel('Bowler7.xlsx')
random2 = findMaxRState(LogicFeatures)
print(random2)
LinearRegressor(LogicFeatures, random2)

147


(LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 array([ 1.63146037e-03, -4.92041964e-01, -3.45622060e-01,  1.24951046e-01,
        -6.52256027e-16,  1.28307744e-02,  6.09516722e-04, -5.48755537e-03,
         6.56117236e-05,  2.01511469e-03, -8.40315307e-02,  4.76025257e-01,
        -1.49042394e-05,  3.78303472e-01,  2.04094847e-01, -1.48076851e-01]),
 0.1529773510085808)

In [139]:
LogicFeatures = pd.read_excel('Bowler7.xlsx')

OutputR2Score(LogicFeatures)

(0.40444488883425844,
 ['Year Bowling Econ. Rate in Powerplay Overs',
  'Year+1 Bowling Econ. Rate T20I',
  'Year Bowling Average ** 2',
  'Year+1 Bowling Econ. Rate T20I ** 2',
  'Year Econ Rate',
  'Year Bowling Innings',
  'Year+1 Econ Rate',
  'Year+1 5w',
  'Year Bowling Econ. Rate in Death Overs ** 2',
  'Year Wickets Taken',
  'Year Age ** 2',
  'Year Bowling SR in Death Overs ** 2',
  'Year+1 Overs ** 2',
  'Year+1 Bowling Average ** 2',
  'Year+1 Age ** 2',
  'Year+1 4w ** 2',
  'IPL Pts per Match Year+2'],
 16)

In [87]:
len(LogicFeatures.columns)

12

In [60]:
bowl_array = ['Year+1 Bowling Average T20I ** 2','Year Bowling Innings','Year+1 Spin','Year Bowling SR in Powerplay Overs','Year Bowling Average in Death Overs','Year+1 Bowling SR in Death Overs ** 2','Year+1 Bowling Average T20I','Year 5w ** 2','Year+1 Bowling Strike Rate ** 2','Year Bowling SR in Death Overs','Year+1 Bowling Average','Year Runs Conceded','Year+1 Bowling Econ. Rate in Powerplay Overs ** 2','Year+1 Bowling Average in Powerplay Overs ** 2','Year+1 Bowling Econ. Rate T20I','Year Bowling Econ. Rate in Death Overs','IPL Pts per Match Year+2']
Bowler_Table[bowl_array].to_excel('Bowler1.xlsx')

In [83]:
LogicFeatures

Unnamed: 0,Year Econ Rate,Year Bowling Innings ** 2,Year+1 5w,Year+1 Bowling SR in Powerplay Overs ** 2,Year+1 Bowling Econ. Rate in Powerplay Overs,Year Overs,Year+1 Bowling Average T20I ** 2,Year 5w,Year 4w ** 2,Year Bowling Econ. Rate in Powerplay Overs,Year 5w ** 2,IPL Pts per Match Year+2
0,6.42,4,0.0,494.790255,8.653061,7.0,744.160773,0,0,6.787037,0,11.500000
1,8.77,100,0.0,600.250000,8.100000,36.0,744.160773,0,0,8.100000,0,15.906250
2,9.48,169,0.0,277.777778,8.800000,49.0,744.160773,0,0,7.600000,0,3.250000
3,5.66,289,0.0,1156.000000,6.300000,67.5,249.640000,0,0,5.000000,0,10.166667
4,8.08,36,0.0,529.000000,6.000000,18.4,744.160773,0,0,7.800000,0,13.909091
...,...,...,...,...,...,...,...,...,...,...,...,...
87,7.09,121,0.0,1600.000000,7.000000,42.5,432.640000,0,0,7.400000,0,23.718750
88,7.65,121,0.0,1600.000000,9.700000,43.0,2209.000000,0,0,7.200000,0,12.500000
89,9.75,1,0.0,4624.000000,7.400000,4.0,345.960000,0,0,6.200000,0,14.535714
90,9.00,81,0.0,548.897959,8.300000,26.0,629.809216,0,0,7.800000,0,15.928571


In [187]:
def pretty_print_coefs(intercept, coefs, columns):
    lst = zip(coefs, columns)
    return str(intercept) + ' ' + " + ".join("%s * %s" % (round(coef, 3), name)
                                   for coef, name in lst)

In [188]:
from sklearn.model_selection import KFold, cross_val_score

In [209]:
def crossValidation(model, X, y, split_num):
    k_fold = KFold(n_splits=split_num)
    for train_indices, test_indices in k_fold.split(X):
        print('Train: %s | test: %s' % (train_indices, test_indices))
    return cross_val_score(model, X, y, cv=k_fold, n_jobs = -1)

In [193]:
X,y = read.iloc[:,:-1], read.iloc[:,-1]
mod = LinearRegressor(read, findMaxRState(read))[0]

In [210]:
crossvalmean= dict()
for i in range(3,15):
    crossvalmean[i] = crossValidation(mod, X,y,i).max()

Train: [31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
 79 80 81 82 83 84 85 86 87 88 89 90 91] | test: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30]
Train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
 79 80 81 82 83 84 85 86 87 88 89 90 91] | test: [31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 61]
Train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61] | test: [62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
 86 87 88 89 90 91]
Train: [23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
 47 48 49 50 51 52 53 54 5

 79 80 81 82 83 84 85 86 87 88 89 90 91] | test: [71 72 73 74 75 76 77]
Train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 85 86 87 88 89 90 91] | test: [78 79 80 81 82 83 84]
Train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84] | test: [85 86 87 88 89 90 91]
Train: [ 8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
 80 81 82 83 84 85 86 87 88 89 90 91] | test: [0 1 2 3 4 5 6 7]
Train: [ 0  1  2  3  4  5  6  7 15 16 17 1

In [211]:
crossValidation(mod,X,y,14)

Train: [ 7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
 79 80 81 82 83 84 85 86 87 88 89 90 91] | test: [0 1 2 3 4 5 6]
Train: [ 0  1  2  3  4  5  6 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
 79 80 81 82 83 84 85 86 87 88 89 90 91] | test: [ 7  8  9 10 11 12 13]
Train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 21 22 23 24 25 26 27 28 29 30
 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
 79 80 81 82 83 84 85 86 87 88 89 90 91] | test: [14 15 16 17 18 19 20]
Train: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 28 29 30
 31 32 33 34 35 36 37 38 39 40 41

array([-0.15021762,  0.40748085, -1.13567723, -0.35822077, -1.57131957,
       -0.03483589, -0.17260441, -0.5215631 , -0.33736883,  0.09296132,
       -0.26308546, -0.40232824, -3.42793657,  0.4708784 ])

In [212]:
train_indices = pd.Series(np.arange(0,86))
test_indices = pd.Series(np.arange(86,92))

In [213]:
mod.fit(X.iloc[train_indices], y.iloc[train_indices])
mod.score(X.iloc[test_indices], y.iloc[test_indices])

0.4708783977561438

In [214]:
pretty_print_coefs(mod.intercept_, mod.coef_, X.columns)

'0.7522489545267028 -0.001 * Year Econ Rate ** 2 + 0.036 * Year+1 Bowling Econ. Rate T20I ** 2 + -0.015 * Year Bowling Innings ** 2 + 0.001 * Year Age ** 2 + -0.001 * Year+1 Bowling Average in Death Overs ** 2 + 0.111 * Year Bowling SR in Death Overs + 0.008 * Year Wickets Taken ** 2 + -0.018 * Year+1 Bowling Average in Death Overs + 0.287 * Year+1 Econ Rate + 0.245 * Year 4w ** 2 + -0.049 * Year+1 Bowling Average T20I + -0.536 * Year+1 Fast + -0.0 * Year Bowling Average in Death Overs ** 2 + -0.011 * Year+1 Bowling Innings ** 2 + 0.002 * Year Bowling SR T20I ** 2 + 0.121 * Year+1 Wickets Taken + 0.153 * Year+1 Bowling SR in Death Overs + 0.249 * Year Bowling Innings + 0.001 * Year+1 Bowling SR T20I ** 2 + 0.008 * Year Bowling Econ. Rate in Powerplay Overs ** 2'

In [263]:
y_pred = mod.predict(X)
InputOutputBowler = pd.DataFrame(columns = ['Player Name','Season Year+2','IPL Pts per Match Year+2', 'Predicted Points', 'Actual Salary Year+2'])
InputOutputBowler['IPL Pts per Match Year+2'] = y
InputOutputBowler['Predicted Points'] = y_pred

In [265]:
Bowler_Table2 = pd.read_excel('2013-2018_Bowler_table.xlsx')

In [266]:
for i in range(len(InputOutputBowler)):
    for count in range(len(Bowler_Table2)):
        if InputOutputBowler['IPL Pts per Match Year+2'][i] == Bowler_Table2['IPL Pts per Match Year+2'][count]:
            if X['Year+1 Bowling Average T20I'][i] == Bowler_Table2['Year+1 Bowling Average T20I'][count]:
                if X['Year Bowling SR in Death Overs'][i] == Bowler_Table2['Year Bowling SR in Death Overs'][count]:
                    InputOutputBowler['Player Name'][i] = Bowler_Table2['Player Name'][count]
                    InputOutputBowler['Season Year+2'][i] = Bowler_Table2['Year+1 Season'][count]+1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [267]:
def ActualSalary(Season, player):
    master = pd.read_excel(str(Season)+'FinalMasterSheet.xlsx', header=1)
    for count in range(len(master)):
        if master['Player Name'][count] == str(player):
            return master['Salary(Rupees Crore)'][count]

In [268]:
for count in range(len(InputOutputBowler)):
    if InputOutputBowler['Season Year+2'][count] != 2020:
        InputOutputBowler['Actual Salary Year+2'][count] = ActualSalary(InputOutputBowler['Season Year+2'][count],InputOutputBowler['Player Name'][count] )
    else:
        InputOutputBowler['Actual Salary Year+2'][count] = 'None'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [269]:
InputOutputBowler

Unnamed: 0,Player Name,Season Year+2,IPL Pts per Match Year+2,Predicted Points,Actual Salary Year+2
0,Abu Nechim,2015,11.500000,9.560071,0.3
1,A Nehra,2015,15.906250,12.419961,2
2,AB Dinda,2015,3.250000,6.654448,1.5
3,DW Steyn,2015,10.166667,9.941150,9.5
4,DS Kulkarni,2015,13.909091,9.341937,1.1
...,...,...,...,...,...
87,SP Narine,2018,23.718750,25.960626,12.5
88,TG Southee,2018,12.500000,13.839139,1
89,TA Boult,2018,14.535714,9.522695,2.2
90,UT Yadav,2018,15.928571,9.490404,4.2


In [270]:
InputOutputBowler.to_excel('InputOutputBowler.xlsx')