In [7]:
import pandas as pd
import numpy as np
from random import randint
import os.path


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [9]:
torch.arange(25468).chunk(11, dim=0)

(tensor([   0,    1,    2,  ..., 2313, 2314, 2315]),
 tensor([2316, 2317, 2318,  ..., 4629, 4630, 4631]),
 tensor([4632, 4633, 4634,  ..., 6945, 6946, 6947]),
 tensor([6948, 6949, 6950,  ..., 9261, 9262, 9263]),
 tensor([ 9264,  9265,  9266,  ..., 11577, 11578, 11579]),
 tensor([11580, 11581, 11582,  ..., 13893, 13894, 13895]),
 tensor([13896, 13897, 13898,  ..., 16209, 16210, 16211]),
 tensor([16212, 16213, 16214,  ..., 18525, 18526, 18527]),
 tensor([18528, 18529, 18530,  ..., 20841, 20842, 20843]),
 tensor([20844, 20845, 20846,  ..., 23157, 23158, 23159]),
 tensor([23160, 23161, 23162,  ..., 25465, 25466, 25467]))

In [10]:
from sklearn import svm
from imblearn.over_sampling import BorderlineSMOTE

In [11]:
# from google.colab import drive
# drive.mount('/content/drive')

In [12]:
df_data = pd.read_csv('https://raw.githubusercontent.com/JamesBond0014/NBA_ALLSTAR_Prediction/main/ASG_data.csv')

names_and_teams = df_data[['PLAYER', 'TEAM']]
for df in [df_data]:

    # the % of team's games the player played in
    # sometimes because of scheduling/trades, a player's indiviual GP may exceed their current team's, so we impose a ceiling of 1
    df['Play Pct.'] = (df['GP'] / df['Team GP']).map(lambda pct : min(pct, 1))

#     # nomalized via league average pace for that year
    for col in ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', '3PM']:
        df['Adjusted ' + col] = df[col] / df['Avg. Pace']

train_data, test_data, test_years = [],[], [2020]
# for i in range(1):
#     test_years.append(randint(1996, 2020))

for index, row in df_data.iterrows():
    if (row['Year'] in test_years):
        test_data.append(row)
    else:
        train_data.append(row)

In [13]:
features_full = [
    'Adjusted PTS',
    'Adjusted REB',
    'Adjusted AST',
    'Adjusted STL',
    'Adjusted BLK',
    'Adjusted TOV',
    'Adjusted 3PM',
    'DEFWS',
    'TS%',
    'USG%',
    'PIE',
    'Play Pct.',
    'Team Conference Rank',
    'Prior ASG Appearances',
    'AS Last Year?'
]

In [14]:
train_df = pd.DataFrame(train_data)


In [15]:
train_df = pd.DataFrame(train_data)
train_targets = np.array(pd.DataFrame(train_df['Selected?'])).reshape(1,-1)[0]

train_df = train_df.drop(columns=['Selected?'])
train_df_filtered = train_df[features_full]

train_df_filtered, train_targets = BorderlineSMOTE(random_state=0).fit_sample(train_df_filtered, train_targets)

test_df = pd.DataFrame(test_data)
test_targets = np.array(pd.DataFrame(test_df['Selected?'])).reshape(1,-1)[0]
test_df = test_df.drop(columns=['Selected?'])
test_df_filtered = test_df[features_full]



In [16]:
classifier = svm.SVC()
classifier.fit(train_df_filtered, train_targets)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [17]:
predictions = classifier.predict(test_df_filtered)

In [18]:
predictions

array([2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 2, 1, 1, 1, 1, 0, 1, 0, 1,
       2, 0, 0, 2, 0, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [20]:
class_pred = predictions
class_target = test_targets

num_cor = len([True for i in range(0, len(class_target)) if class_pred[i] == class_target[i]])
tp = len([True for i in range(0, len(class_target)) if class_pred[i] != 0 and class_pred[i] == class_target[i]])
fn = len([True for i in range(0, len(class_target)) if class_pred[i] == 0 and class_pred[i] != class_target[i]])
p = len([True for i in range(0, len(class_target)) if class_pred[i] != 0])
# n = len(class_pred) - p

acc = num_cor/len(class_target)
prec = tp / p
rec = tp / (fn+p)
f1_score = 2*prec*rec / (prec+rec)
# min_loss = min(losses)




print("Accuracy: {}".format(acc))
print("Precision: {}".format(prec))
print("Recall: {}".format(rec))
print("F1_score: {}".format(f1_score))
# print("Min loss: {}".format(min_loss))

Accuracy: 0.8803418803418803
Precision: 0.2631578947368421
Recall: 0.2631578947368421
F1_score: 0.2631578947368421


In [None]:
count = 0
acc = 0
mask = test_targets != 0
for i in range(len(predictions)):
    loss = abs(predictions[i]-test_targets[i])
    if (mask[i] == True):
        if loss == 0:
            acc+=1
        if loss == 1 and predictions[i]+test_targets[i] ==3:
            acc+=0.5
        count +=1



acc/count