In [179]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
import pandas as pd
import numpy as np

nba = pd.read_csv('nba2021.csv')

# Features Selected
X = nba[["STL", "MP", "FG%", "3PA", "2P%", "3P", "eFG%", "BLK", "ORB", "DRB", "AST"]]
y = nba["Pos"]

# Split the dataset into train and test sets
train_feature, test_feature, train_class, test_class = train_test_split(
    X, y, test_size = 0.25) # 75% train, 25% test

svm = SVC(kernel = 'linear')

svm.fit(train_feature, train_class)

prediction = svm.predict(test_feature)

In [180]:
print("Test set accuracy: {:.3f}%".format(accuracy_score(test_class, prediction) * 100))

Test set accuracy: 50.400%


In [181]:
print(pd.crosstab(test_class, prediction, rownames=['True'], colnames=['Predicted'], margins = True))

Predicted   C  PF  PG  SF  SG  All
True                              
C          18   3   0   0   0   21
PF          9  10   1   2   3   25
PG          0   3  18   0   9   30
SF          0   8   1   1   8   18
SG          1   2   8   4  16   31
All        28  26  28   7  36  125


In [182]:
# Perform 10-fold stratified cross-validation
cv_scores = cross_val_score(svm, X, y, cv = 10)
print("Accuracy for each fold in Cross-validation:")
for i, score in enumerate(cv_scores, 1):
    print("Fold {}: {:.2f}%".format(i, score * 100))
avg_accuracy = np.mean(cv_scores)
print("Average Accuracy across all folds: {:.3f}%".format(avg_accuracy * 100))

Accuracy for each fold in Cross-validation:
Fold 1: 58.00%
Fold 2: 46.00%
Fold 3: 62.00%
Fold 4: 48.00%
Fold 5: 56.00%
Fold 6: 48.00%
Fold 7: 42.00%
Fold 8: 65.31%
Fold 9: 59.18%
Fold 10: 48.98%
Average Accuracy across all folds: 53.347%
