In [1]:
import warnings
from pandas import Series
import pandas as pd
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
data= pd.read_csv('Database.csv')

data = pd.get_dummies(data = data, columns = ['teamRslt'])
data= data.drop(['teamRslt_Loss'], axis=1)

filtered_data= data[data["teamAbbr"] == 'BRK'].reset_index()
filtered_data= filtered_data.drop(['teamAbbr'], axis=1)

efg= filtered_data[['teamEFG%']]
to= filtered_data[['teamTO%']]
oreb= filtered_data[['teamOREB%']]
fta= filtered_data[['teamFTA']].astype('float32')
fga= filtered_data[['teamFGA']].astype('float32')

efg_series= efg.squeeze()
print(efg_series)
to_series= to.squeeze()
oreb_series= oreb.squeeze()
fta_series= fta.squeeze()
fga_series= fga.squeeze()

0     0.510
1     0.516
2     0.495
3     0.527
4     0.517
      ...  
74    0.548
75    0.675
76    0.404
77    0.494
78    0.500
Name: teamEFG%, Length: 79, dtype: float64


In [20]:
def evaluate_arima_model(X, arima_order):
    # prepare training dataset

    train_size = int(len(X) * 0.9)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
    # calculate out of sample error
    error = mean_squared_error(test, predictions)
    return error

# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values, df, feature):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    mse = evaluate_arima_model(dataset, order)
                    if mse < best_score:
                        best_score, best_cfg = mse, order
                    print('ARIMA%s MSE=%.3f' % (order,mse))
                except:
                    continue
    print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))
    model = ARIMA(df, order=(best_cfg)) 
    results_ARIMA = model.fit()
    print("Predicted " + feature +" for the next match:\n" + str(results_ARIMA.forecast(1)[0]))


# evaluate parameters
p_values = range(0, 2)
d_values = range(0, 2)
q_values = range(0, 2)
warnings.filterwarnings("ignore")

evaluate_models(efg_series.values, p_values, d_values, q_values, efg, 'EFG%')
evaluate_models(to_series.values, p_values, d_values, q_values, to, 'TO%')
evaluate_models(oreb_series.values, p_values, d_values, q_values, oreb, 'OREB%')
evaluate_models(fta_series.values, p_values, d_values, q_values, fta, 'FTA')
evaluate_models(fga_series.values, p_values, d_values, q_values, fga, 'FGA')

ARIMA(0, 0, 0) MSE=0.011
ARIMA(0, 0, 1) MSE=0.012
ARIMA(0, 1, 0) MSE=0.027
ARIMA(0, 1, 1) MSE=0.012
ARIMA(1, 0, 0) MSE=0.013
ARIMA(1, 1, 0) MSE=0.016
ARIMA(1, 1, 1) MSE=0.013
Best ARIMA(0, 0, 0) MSE=0.011
Predicted EFG% for the next match:
[0.53987342]
ARIMA(0, 0, 0) MSE=17.116
ARIMA(0, 0, 1) MSE=17.709
ARIMA(0, 1, 0) MSE=31.243
ARIMA(0, 1, 1) MSE=18.021
ARIMA(1, 0, 0) MSE=17.543
ARIMA(1, 0, 1) MSE=16.990
ARIMA(1, 1, 0) MSE=25.527
ARIMA(1, 1, 1) MSE=18.454
Best ARIMA(1, 0, 1) MSE=16.990
Predicted TO% for the next match:
[11.47525076]
ARIMA(0, 0, 0) MSE=72.258
ARIMA(0, 0, 1) MSE=72.155
ARIMA(0, 1, 0) MSE=132.209
ARIMA(0, 1, 1) MSE=72.340
ARIMA(1, 0, 0) MSE=72.031
ARIMA(1, 0, 1) MSE=74.217
ARIMA(1, 1, 0) MSE=94.901
ARIMA(1, 1, 1) MSE=72.231
Best ARIMA(1, 0, 0) MSE=72.031
Predicted OREB% for the next match:
[21.61845635]
ARIMA(0, 0, 0) MSE=76.923
ARIMA(0, 0, 1) MSE=80.607
ARIMA(0, 1, 0) MSE=144.568
ARIMA(0, 1, 1) MSE=78.659
ARIMA(1, 0, 0) MSE=80.203
ARIMA(1, 0, 1) MSE=80.812
ARIMA(1, 1, 0