In [24]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn import preprocessing


In [25]:
data = pd.read_csv('augmented_train.csv')

In [26]:
data.head(5)

Unnamed: 0,PLAYER,class,form,2018_Runs,2019_Runs
0,Aaron Finch,1.0,53.7,134,160
1,AB de Villiers,3.0,70.0,480,424
2,Abhishek Sharma,0.0,20.0,63,63
3,Ajinkya Rahane,2.0,50.3,370,396
4,Alex Hales,0.0,20.0,148,165


In [27]:
x = data.drop(['2019_Runs', 'PLAYER'], axis = 1)
y = (data['2019_Runs'])

In [28]:
x.shape, y.shape

((100, 3), (100,))

In [29]:
X = ((x.to_numpy()).astype(float))
Y = (y.to_numpy()).astype(int)

In [7]:
X[:,1] = (X[:,1] - X[:,1].min())/(X[:,1].max() - X[:,1].min())

In [8]:
X[:,2] = (X[:,2] - X[:,2].min())/(X[:,2].max() - X[:,2].min())

In [9]:
Y = (Y - Y.min())/(Y.max() - Y.min())

In [10]:
X[:4]

array([[1.        , 0.53070866, 0.16759003],
       [3.        , 0.78740157, 0.6468144 ],
       [0.        , 0.        , 0.06925208],
       [2.        , 0.47716535, 0.49445983]])

In [11]:
Y[:4]

array([0.22108346, 0.60761347, 0.07906296, 0.56661786])

In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

In [31]:
from sklearn.model_selection import train_test_split

In [32]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

In [33]:
x_train = torch.autograd.Variable(torch.tensor(X_train).float()) 
x_test = (torch.tensor(X_test).long())
y_train = torch.autograd.Variable(torch.tensor(Y_train).float())
y_test = torch.tensor(Y_test).long()

In [34]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [35]:
# Hyper-parameters 
input_size = X.shape[1]
hidden_size = 5
num_classes = 1
num_epochs = 10
batch_size = 10
learning_rate = 0.001

## Neural Network with Torch

In [41]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 5) 
        self.fc2 = nn.Linear(5,1)
        self.fc3 = nn.Linear(2,1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
#         out = self.fc3(out)
#         out = self.relu(out)
        return out
    
model = NeuralNet(input_size, num_classes).to(device)   
# Loss and optimizer
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [42]:

# Training in batches
for epoch in range(num_epochs):
    for i in tqdm(range(0, len(x_train), batch_size)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
        #print(f"{i}:{i+BATCH_SIZE}")
        correct = 0
        total = 0
        batch_X = x_train[i:i+batch_size].view(-1,X.shape[1])
        batch_y = y_train[i:i+batch_size].flatten()

        model.zero_grad()

        outputs = model(batch_X)
                
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update

    print(f"Epoch: {epoch}. Loss: {loss} ")

100%|██████████| 8/8 [00:00<00:00, 759.18it/s]
100%|██████████| 8/8 [00:00<00:00, 755.93it/s]
100%|██████████| 8/8 [00:00<00:00, 757.85it/s]
100%|██████████| 8/8 [00:00<00:00, 507.83it/s]
100%|██████████| 8/8 [00:00<00:00, 728.19it/s]
100%|██████████| 8/8 [00:00<00:00, 419.73it/s]
100%|██████████| 8/8 [00:00<00:00, 519.19it/s]
100%|██████████| 8/8 [00:00<00:00, 395.99it/s]
100%|██████████| 8/8 [00:00<00:00, 452.44it/s]
100%|██████████| 8/8 [00:00<00:00, 441.01it/s]

Epoch: 0. Loss: 95753.296875 
Epoch: 1. Loss: 95753.296875 
Epoch: 2. Loss: 95753.296875 
Epoch: 3. Loss: 95753.296875 
Epoch: 4. Loss: 95753.296875 
Epoch: 5. Loss: 95753.296875 
Epoch: 6. Loss: 95753.296875 
Epoch: 7. Loss: 95753.296875 
Epoch: 8. Loss: 95753.296875 
Epoch: 9. Loss: 95753.296875 





In [43]:
for i in range(len(outputs)):    
    #print(batch_y[i],'  ', outputs[i])
    print((batch_y[i]),'  ', (outputs[i].data))

tensor(464.)    tensor([0.])
tensor(373.)    tensor([0.])
tensor(402.)    tensor([0.])
tensor(445.)    tensor([0.])
tensor(11.)    tensor([0.])
tensor(15.)    tensor([0.])
tensor(86.)    tensor([0.])
tensor(393.)    tensor([0.])
tensor(42.)    tensor([0.])
tensor(282.)    tensor([0.])


## Simple linear regression with sckit-learn

In [44]:
from sklearn.linear_model import LinearRegression

In [45]:
reg = LinearRegression().fit(X_train, Y_train)

In [46]:
reg.score(X_train, Y_train)

0.981749756400226

In [50]:
reg.score(X_test, Y_test)

0.9782157806732109

In [59]:
for i in range(20):
    pred = round(reg.predict(X_test)[i])
    print(pred,' ',Y_test[i])


16   9
44   26
132   98
114   156
25   15
18   9
99   101
525   510
196   184
154   160
112   143
75   81
633   593
266   311
424   405
126   165
93   98
417   405
109   141
194   183


In [55]:
(reg.predict((X_train)))[2]

41.36915775081276

In [57]:
(X_train)[2]

array([  3. ,  58.6, 498. ])

## Testing for 2019 given data

In [60]:
data = pd.read_csv('augmented_test.csv')

In [61]:
data.head(5)

Unnamed: 0,PLAYER,class,form,2019_Runs
0,David Warner,3.0,51.2,692
1,KL Rahul,3.0,58.3,593
2,Quinton de Kock,3.0,62.4,529
3,Shikhar Dhawan,3.0,61.8,521
4,Andre Russell,3.0,85.5,510


In [70]:
x = data.drop(['PLAYER'], axis = 1)
players = []
for player in data['PLAYER'].values:
    player = player.replace(u'\xa0', u' ')
    players.append(str(player))
    

In [71]:
X = ((x.to_numpy()).astype(float))

In [72]:
X.shape

(100, 3)

In [76]:
X[0]

array([  3. ,  51.2, 692. ])

In [88]:
results = pd.read_csv('results.csv')

In [111]:
for i,row in data.iterrows():
    player = row['PLAYER']
    test = X[i].reshape(1,3)
    score = int(np.round((reg.predict(test))))
    results['2020_Runs'][results['PLAYER']==player] = score
#     results.iloc[i][1] = str(score)
    print(players[i], ' ', score)
    


David Warner   619
KL Rahul   545
Quinton de Kock   497
Shikhar Dhawan   490
Andre Russell   505
Chris Gayle   481
Rishabh Pant   465
Virat Kohli   439
Shreyas Iyer   435
Jonny Bairstow   434
AB de Villiers   428
Suryakumar Yadav   406
MS Dhoni   398
Chris Lynn   407
Rohit Sharma   400
Hardik Pandya   403
Shane Watson   394
Faf du Plessis   379
Ajinkya Rahane   377
Suresh Raina   371
Parthiv Patel   369
Prithvi Shaw   351
Manish Pandey   328
Nitish Rana   346
Sanju Samson   332
Mayank Agarwal   325
Steve Smith   303
Jos Buttler   258
Shubman Gill   288
Ambati Rayudu   271
Robin Uthappa   289
Kieron Pollard   292
Dinesh Karthik   272
Vijay Shankar   245
Moeen Ali   253
David Miller   227
Marcus Stoinis   225
Colin Ingram   201
Krunal Pandya   196
Sarfaraz Khan   152
Nicholas Pooran   142
Mandeep Singh   165
Kedar Jadhav   181
Riyan Parag   135
Kane Williamson   169
Sunil Narine   193
Rahul Tripathi   120
Ben Stokes   137
Mohammad Nabi   99
Axar Patel   134
Ravindra Jadeja   127
Ishan Ki

In [112]:
results

Unnamed: 0,PLAYER,2020_Runs
0,David Warner,619
1,KL Rahul,545
2,Quinton de Kock,497
3,Shikhar Dhawan,490
4,Andre Russell,505
...,...,...
95,Carlos Brathwaite,15
96,Ishant Sharma,14
97,Shakib Al Hasan,13
98,Pawan Negi,13


In [113]:
results.to_csv('results.csv',index = False)

In [90]:
int(score)

12

In [108]:
results.iloc[0][1]

'619'