In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\alvin\anaconda3\lib\site-packages (0.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("../resources/shot_logs_clean.csv")
df.head()

Unnamed: 0,outcome,period,game_clock,shot_clock,dribbles,touch_time,shot_dist,pts_type,close_def_dist
0,1,1,1:09,10.8,2,1.9,7.7,2,1.3
1,0,1,0:14,3.4,0,0.8,28.2,3,6.1
2,0,2,11:47,10.3,2,1.9,17.2,2,3.4
3,0,2,10:34,10.9,2,2.7,3.7,2,1.1
4,0,2,8:15,9.1,2,4.4,18.4,2,2.6


# Select your features (columns)

In [6]:
# Set features. This will also be used as your x values.
selected_features = df.drop(["outcome", "game_clock"], axis=1)

In [7]:
X = selected_features
y = df["outcome"]
print(X.shape, y.shape)

(122491, 7) (122491,)



# Create a Train Test Split


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [9]:
X_train.head()

Unnamed: 0,period,shot_clock,dribbles,touch_time,shot_dist,pts_type,close_def_dist
115644,2,3.0,0,0.8,19.8,2,5.4
9799,3,4.7,1,3.5,31.9,3,4.6
114236,3,1.3,0,0.6,3.9,2,0.3
43456,1,11.5,3,4.8,3.0,2,1.5
38850,3,13.0,1,1.4,16.9,2,6.1


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [10]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler,StandardScaler
from tensorflow.keras.utils import to_categorical

# scale the data
X_scaler =  StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Train the Model



In [11]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)
predictions = model.predict(X_test_scaled)
model

SVC(kernel='linear')

In [12]:
print(f"Training Data Score: {model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {model.score(X_test_scaled, y_test)}")

Training Data Score: 0.5924805155222711
Testing Data Score: 0.5968716324331385


# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [13]:
# Create the GridSearchCV model
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [14]:
# Train the model with GridSearch
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=1, gamma=0.0001, score=0.588, total= 1.7min
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.7min remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.591, total= 1.7min
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  3.3min remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.591, total= 1.6min
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.599, total= 1.5min
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.592, total= 1.7min
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.588, total= 1.8min
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.591, total= 1.6min
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.591, total= 1.5min
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.599, total= 1.5min
[CV] C=1, gamma=0.0005 ...............................................
[CV] .

[CV] .................. C=50, gamma=0.0001, score=0.588, total=13.3min
[CV] C=50, gamma=0.0001 ..............................................
[CV] .................. C=50, gamma=0.0001, score=0.591, total=13.0min
[CV] C=50, gamma=0.0001 ..............................................
[CV] .................. C=50, gamma=0.0001, score=0.591, total=12.9min
[CV] C=50, gamma=0.0001 ..............................................
[CV] .................. C=50, gamma=0.0001, score=0.599, total=12.8min
[CV] C=50, gamma=0.0001 ..............................................
[CV] .................. C=50, gamma=0.0001, score=0.592, total=13.1min
[CV] C=50, gamma=0.0005 ..............................................
[CV] .................. C=50, gamma=0.0005, score=0.588, total=13.3min
[CV] C=50, gamma=0.0005 ..............................................
[CV] .................. C=50, gamma=0.0005, score=0.591, total=13.0min
[CV] C=50, gamma=0.0005 ..............................................
[CV] .

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed: 439.1min finished


GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [1, 5, 10, 50],
                         'gamma': [0.0001, 0.0005, 0.001, 0.005]},
             verbose=3)

In [15]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 5, 'gamma': 0.0001}
0.5923064237515745


In [18]:
 # Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.62      0.67      0.64     16590
           1       0.57      0.51      0.54     14033

    accuracy                           0.60     30623
   macro avg       0.59      0.59      0.59     30623
weighted avg       0.59      0.60      0.59     30623

