# **Importing libraries**

In [None]:
import csv
import random
import math
import numpy as np
import pandas as pd

# **Loading file**


In [None]:
dataset=pd.read_csv("IPL.csv")

# **Splitting of dataset**

In [None]:
x = dataset.iloc[:, 7:-1].values
y = dataset.iloc[:, -1].values

# **Adding bias term to x**

In [None]:
one = np.ones((len(x), 1))
x = np.append(one, x, axis=1)

# **Reshaping y to column matrix**

In [None]:
y = np.array(y).reshape((len(y),1))
print(y.shape)

(76014, 1)


# **Splitting the dataset randomly**

In [None]:
def train_test_split(x, y, split):

    #randomly assigning split% rows to training set and rest to test set
    indices = np.array(range(len(x)))
    
    train_size = round(split * len(x))

    random.shuffle(indices)

    train_indices = indices[0:train_size]
    test_indices = indices[train_size:len(x)]

    x_train = x[train_indices, :]
    x_test = x[test_indices, :]
    y_train = y[train_indices, :]
    y_test = y[test_indices, :]
    
#print(x_train)

    return x_train,y_train, x_test, y_test

In [None]:
split = 0.7
X_train, Y_train, X_test, Y_test = train_test_split(x, y, split)
print(X_train)
print(Y_train)
print(X_test)
print(Y_test)

[[  1. 103.   5. ...   2.  37.   0.]
 [  1.  38.   1. ...   1.  23.  13.]
 [  1.  23.   2. ...   2.  19.   2.]
 ...
 [  1. 100.   1. ...   1.  22.  14.]
 [  1.  94.   4. ...   2.   8.   6.]
 [  1. 135.   4. ...   2.  13.   1.]]
[[132]
 [115]
 [ 95]
 ...
 [192]
 [166]
 [170]]
[[  1.  95.   3. ...   1.  28.  12.]
 [  1. 140.   6. ...   1.  36.   0.]
 [  1. 131.   3. ...   1.  54.  12.]
 ...
 [  1. 155.   5. ...   2.  27.   2.]
 [  1.  45.   0. ...   0.  23.  21.]
 [  1.  32.   0. ...   0.  14.  11.]]
[[142]
 [143]
 [153]
 ...
 [176]
 [142]
 [196]]


# **Beta value and prediction**

In [None]:
def normal_equation(x, y):
    beta = np.dot((np.linalg.inv(np.dot(x.T,x))), np.dot(x.T,y))

    return beta
def predict(X_test, beta):
    return np.dot(X_test, beta)

In [None]:
beta = normal_equation(X_train, Y_train)
print(beta)
predictions = predict(X_test, beta)
prediction =predict([[1, 201, 3, 19, 71, 2, 82, 17]],beta)

print(prediction)

[[ 1.61371585e+02]
 [ 9.31902072e-01]
 [-4.87744113e+00]
 [-5.95794674e+00]
 [ 7.74237075e-02]
 [-1.82810141e+00]
 [-7.53314428e-03]
 [-5.71215822e-02]]
[[221.10268579]]


In [None]:
print(Y_test)

[[142]
 [143]
 [153]
 ...
 [176]
 [142]
 [196]]


# **Accuracy(R Value)**

In [None]:
def metrics(predictions, Y_test):

    #calculating mean absolute error
    MAE = np.mean(np.abs(predictions-Y_test))

    #calculating root mean square error
    MSE = np.square(np.subtract(Y_test,predictions)).mean() 
    RMSE = math.sqrt(MSE)

    #calculating r_square
    rss = np.sum(np.square((Y_test- predictions)))
    mean = np.mean(Y_test)
    sst = np.sum(np.square(Y_test-mean))
    r_square = 1 - (rss/sst)
    

    return MAE, RMSE, r_square

In [None]:
mae, rmse, r_square = metrics(predictions, Y_test)
print("Mean Absolute Error: ", mae)
print("Root Mean Square Error: ", rmse)
print("R square: ", r_square)

Mean Absolute Error:  15.18195126198544
Root Mean Square Error:  20.529157683637067
R square:  0.5069252832983899
