In [69]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

from sklearn.metrics import confusion_matrix, classification_report, f1_score, roc_auc_score, accuracy_score, recall_score,precision_score
from sklearn.preprocessing import StandardScaler, normalize

from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline,Pipeline

import datetime
import time

In [2]:
# mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0","/gpu:1"])
mirrored_strategy = tf.distribute.MirroredStrategy(devices= ["/gpu:0","/gpu:1"],
                                                   cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
mirrored_strategy = tf.distribute.MirroredStrategy()

Num GPUs Available:  1
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [6]:
# Pour jupyter-notebook
games_data = pd.read_csv('nba_games_data.csv', sep=';')
players_data = pd.read_csv('data_save/nba_player_stats_data.csv', sep=';')

In [7]:
# Pour Google colab
# from google.colab import drive
# drive.mount('/content/gdrive')
# games_data = pd.read_csv('/content/gdrive/MyDrive/NBA/nba_games_data.csv', sep=';', encoding='latin-1')

### Players Data

In [8]:
players_data.shape
players_data.head()

Unnamed: 0,game_id,player_name,minutes_played,field_goals,field_goal_attempts,field_goal_percentage,three_points_field_goals,three_points_field_goal_attempts,three_points_field_goal_percentage,free_throws,...,offensive_rebounds,defensive_rebounds,total_rebounds,assists,steals,blocks,turn_over,personnal_fouls,points,plus_minus
0,e6a75731-6868-4a2a-835d-4e1be4c49815,Andrew Wiggins,31:14,4.0,16.0,0.25,2.0,6.0,0.333,3.0,...,0.0,2.0,2.0,1.0,0.0,1.0,4.0,4.0,13.0,-28.0
1,e6a75731-6868-4a2a-835d-4e1be4c49815,Stephen Curry,30:19,7.0,21.0,0.333,2.0,10.0,0.2,4.0,...,3.0,1.0,4.0,10.0,2.0,0.0,3.0,1.0,20.0,-23.0
2,e6a75731-6868-4a2a-835d-4e1be4c49815,Kelly Oubre,25:39,3.0,14.0,0.214,0.0,6.0,0.0,0.0,...,4.0,3.0,7.0,2.0,1.0,2.0,3.0,1.0,6.0,-28.0
3,e6a75731-6868-4a2a-835d-4e1be4c49815,James Wiseman,24:17,7.0,13.0,0.538,1.0,1.0,1.0,4.0,...,1.0,5.0,6.0,0.0,2.0,0.0,1.0,2.0,19.0,-10.0
4,e6a75731-6868-4a2a-835d-4e1be4c49815,Eric Paschall,21:33,2.0,6.0,0.333,1.0,1.0,1.0,1.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,6.0,-28.0


In [9]:
players_data.columns

Index(['game_id', 'player_name', 'minutes_played', 'field_goals',
       'field_goal_attempts', 'field_goal_percentage',
       'three_points_field_goals', 'three_points_field_goal_attempts',
       'three_points_field_goal_percentage', 'free_throws',
       'free_throw_attempts', 'free_throw_percentage', 'offensive_rebounds',
       'defensive_rebounds', 'total_rebounds', 'assists', 'steals', 'blocks',
       'turn_over', 'personnal_fouls', 'points', 'plus_minus'],
      dtype='object')

In [11]:
players_data.groupby(['player_name']).mean()

Unnamed: 0_level_0,field_goals,field_goal_attempts,field_goal_percentage,three_points_field_goals,three_points_field_goal_attempts,three_points_field_goal_percentage,free_throws,free_throw_attempts,free_throw_percentage,offensive_rebounds,defensive_rebounds,total_rebounds,assists,steals,blocks,turn_over,personnal_fouls,points,plus_minus
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
A.C. Green,1.919708,4.313869,0.423016,0.007299,0.072993,0.100000,0.927007,1.299270,0.705700,1.627737,3.036496,4.664234,0.635036,0.510949,0.138686,0.547445,1.532847,4.773723,2.474453
A.J. Bramlett,0.500000,2.625000,0.312500,0.000000,0.000000,,0.000000,0.000000,,1.500000,1.250000,2.750000,0.000000,0.125000,0.000000,0.375000,1.625000,1.000000,-5.250000
A.J. Guyton,2.075000,5.500000,0.314390,0.912500,2.412500,0.303635,0.462500,0.562500,0.783350,0.275000,0.725000,1.000000,1.837500,0.250000,0.150000,0.775000,0.725000,5.525000,-2.375000
A.J. Hammons,0.772727,1.909091,0.386867,0.227273,0.454545,0.547571,0.409091,0.909091,0.416667,0.363636,1.272727,1.636364,0.181818,0.045455,0.590909,0.454545,0.954545,2.181818,-0.227273
A.J. Price,2.085185,5.485185,0.361116,0.833333,2.614815,0.288846,0.800000,1.070370,0.744064,0.262963,1.166667,1.429630,2.125926,0.474074,0.033333,0.874074,0.896296,5.803704,-0.148148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ömer Aşık,1.968504,3.748031,0.501497,0.000000,0.001969,0.000000,1.242126,2.255906,0.542857,2.181102,4.838583,7.019685,0.594488,0.356299,0.730315,1.151575,2.015748,5.179134,0.818898
Šarūnas Jasikevičius,2.060811,5.209459,0.369382,0.891892,2.533784,0.337703,1.412162,1.567568,0.888127,0.236486,1.304054,1.540541,2.750000,0.445946,0.040541,1.425676,1.290541,6.425676,-0.500000
Žan Tabak,1.580645,3.096774,0.444321,0.000000,0.000000,,0.354839,0.822581,0.450000,1.129032,2.387097,3.516129,0.532258,0.177419,0.483871,0.967742,2.080645,3.516129,0.048387
Žarko Čabarkapa,1.560000,3.653333,0.413015,0.160000,0.586667,0.210564,0.986667,1.346667,0.721635,0.653333,1.426667,2.080000,0.566667,0.220000,0.166667,0.793333,1.473333,4.266667,-1.400000


In [17]:
players_data = players_data.drop(columns=['game_id'])

In [18]:
players_data = players_data.drop(columns=['field_goal_percentage', 'three_points_field_goal_percentage', 
                                        'free_throw_percentage', 'personnal_fouls', 'plus_minus', 'three_points_field_goals', 'three_points_field_goal_attempts'])

In [19]:
players_data

Unnamed: 0,player_name,minutes_played,field_goals,field_goal_attempts,three_points_field_goals,three_points_field_goal_attempts,free_throws,free_throw_attempts,offensive_rebounds,defensive_rebounds,total_rebounds,assists,steals,blocks,turn_over,points
0,Andrew Wiggins,31:14,4.0,16.0,2.0,6.0,3.0,4.0,0.0,2.0,2.0,1.0,0.0,1.0,4.0,13.0
1,Stephen Curry,30:19,7.0,21.0,2.0,10.0,4.0,4.0,3.0,1.0,4.0,10.0,2.0,0.0,3.0,20.0
2,Kelly Oubre,25:39,3.0,14.0,0.0,6.0,0.0,0.0,4.0,3.0,7.0,2.0,1.0,2.0,3.0,6.0
3,James Wiseman,24:17,7.0,13.0,1.0,1.0,4.0,8.0,1.0,5.0,6.0,0.0,2.0,0.0,1.0,19.0
4,Eric Paschall,21:33,2.0,6.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673997,Kurt Thomas,9:39,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
673998,John Wallace,,,,,,,,,,,,,,,
673999,Andrew Lang,,,,,,,,,,,,,,,
674000,Chris Dudley,,,,,,,,,,,,,,,


###  Valeurs manquantes

In [29]:
print("Nombre de valeurs nulles : " + str(players_data.isnull().sum().sum()))

Nombre de valeurs nulles : 1659075


In [30]:
# players_data.isna().sum()

player_name                              0
minutes_played                      110605
field_goals                         110605
field_goal_attempts                 110605
three_points_field_goals            110605
three_points_field_goal_attempts    110605
free_throws                         110605
free_throw_attempts                 110605
offensive_rebounds                  110605
defensive_rebounds                  110605
total_rebounds                      110605
assists                             110605
steals                              110605
blocks                              110605
turn_over                           110605
points                              110605
dtype: int64

In [58]:
players_data = players_data[players_data['minutes_played'].notna()]

In [59]:
players_data

Unnamed: 0,player_name,minutes_played,field_goals,field_goal_attempts,three_points_field_goals,three_points_field_goal_attempts,free_throws,free_throw_attempts,offensive_rebounds,defensive_rebounds,total_rebounds,assists,steals,blocks,turn_over,points
0,Andrew Wiggins,31:14,4.0,16.0,2.0,6.0,3.0,4.0,0.0,2.0,2.0,1.0,0.0,1.0,4.0,13.0
1,Stephen Curry,30:19,7.0,21.0,2.0,10.0,4.0,4.0,3.0,1.0,4.0,10.0,2.0,0.0,3.0,20.0
2,Kelly Oubre,25:39,3.0,14.0,0.0,6.0,0.0,0.0,4.0,3.0,7.0,2.0,1.0,2.0,3.0,6.0
3,James Wiseman,24:17,7.0,13.0,1.0,1.0,4.0,8.0,1.0,5.0,6.0,0.0,2.0,0.0,1.0,19.0
4,Eric Paschall,21:33,2.0,6.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673993,Patrick Ewing,37:37,6.0,15.0,0.0,0.0,6.0,9.0,3.0,9.0,12.0,1.0,2.0,3.0,4.0,18.0
673994,Charlie Ward,26:58,2.0,3.0,0.0,1.0,2.0,2.0,1.0,3.0,4.0,6.0,2.0,0.0,0.0,6.0
673995,Marcus Camby,20:11,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0
673996,Chris Childs,18:01,2.0,2.0,0.0,0.0,2.0,2.0,0.0,2.0,2.0,5.0,0.0,0.0,1.0,6.0


In [60]:
print("Nombre de valeurs nulles : " + str(players_data.isnull().sum().sum()))

Nombre de valeurs nulles : 0


### Calcul score efficacité

In [75]:
player_efficiency = []

In [77]:
efficiency = 0

for index, row in players_data.iterrows():
#     x = time.strptime('00:'+row['minutes_played']+',000'.split(',')[0],'%H:%M:%S')
#     seconds = datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
    
    efficiency = row['points'] + row['total_rebounds'] + row['assists'] + row['steals'] + row['blocks'] - (row['field_goal_attempts'] -row['field_goals']) - (row['free_throw_attempts'] -row['free_throws'])
    player_efficiency.append(efficiency)

# player_efficiency

[4.0,
 22.0,
 7.0,
 17.0,
 3.0,
 4.0,
 4.0,
 12.0,
 12.0,
 11.0,
 6.0,
 3.0,
 9.0,
 28.0,
 25.0,
 16.0,
 14.0,
 15.0,
 25.0,
 4.0,
 18.0,
 8.0,
 2.0,
 3.0,
 2.0,
 3.0,
 4.0,
 1.0,
 38.0,
 17.0,
 14.0,
 14.0,
 18.0,
 18.0,
 9.0,
 7.0,
 5.0,
 -2.0,
 -1.0,
 0.0,
 0.0,
 20.0,
 20.0,
 22.0,
 13.0,
 2.0,
 28.0,
 11.0,
 7.0,
 2.0,
 -1.0,
 8.0,
 0.0,
 40.0,
 14.0,
 33.0,
 5.0,
 9.0,
 12.0,
 7.0,
 13.0,
 1.0,
 3.0,
 24.0,
 35.0,
 18.0,
 29.0,
 22.0,
 22.0,
 12.0,
 12.0,
 1.0,
 1.0,
 -2.0,
 26.0,
 34.0,
 9.0,
 9.0,
 10.0,
 24.0,
 6.0,
 6.0,
 6.0,
 7.0,
 -1.0,
 0.0,
 40.0,
 30.0,
 22.0,
 25.0,
 1.0,
 14.0,
 6.0,
 13.0,
 7.0,
 28.0,
 35.0,
 15.0,
 16.0,
 3.0,
 27.0,
 7.0,
 4.0,
 8.0,
 1.0,
 17.0,
 27.0,
 17.0,
 25.0,
 1.0,
 16.0,
 8.0,
 14.0,
 6.0,
 7.0,
 32.0,
 28.0,
 12.0,
 7.0,
 10.0,
 10.0,
 5.0,
 16.0,
 5.0,
 1.0,
 7.0,
 38.0,
 28.0,
 15.0,
 9.0,
 1.0,
 19.0,
 10.0,
 4.0,
 17.0,
 7.0,
 0.0,
 38.0,
 20.0,
 23.0,
 25.0,
 19.0,
 21.0,
 9.0,
 1.0,
 1.0,
 3.0,
 22.0,
 22.0,
 17.0,
 8.0,
 16.0,
 5.

In [78]:
players_data['efficiency'] = player_efficiency

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_data['efficiency'] = player_efficiency


In [79]:
players_data

Unnamed: 0,player_name,minutes_played,field_goals,field_goal_attempts,three_points_field_goals,three_points_field_goal_attempts,free_throws,free_throw_attempts,offensive_rebounds,defensive_rebounds,total_rebounds,assists,steals,blocks,turn_over,points,efficiency
0,Andrew Wiggins,31:14,4.0,16.0,2.0,6.0,3.0,4.0,0.0,2.0,2.0,1.0,0.0,1.0,4.0,13.0,4.0
1,Stephen Curry,30:19,7.0,21.0,2.0,10.0,4.0,4.0,3.0,1.0,4.0,10.0,2.0,0.0,3.0,20.0,22.0
2,Kelly Oubre,25:39,3.0,14.0,0.0,6.0,0.0,0.0,4.0,3.0,7.0,2.0,1.0,2.0,3.0,6.0,7.0
3,James Wiseman,24:17,7.0,13.0,1.0,1.0,4.0,8.0,1.0,5.0,6.0,0.0,2.0,0.0,1.0,19.0,17.0
4,Eric Paschall,21:33,2.0,6.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,6.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673993,Patrick Ewing,37:37,6.0,15.0,0.0,0.0,6.0,9.0,3.0,9.0,12.0,1.0,2.0,3.0,4.0,18.0,24.0
673994,Charlie Ward,26:58,2.0,3.0,0.0,1.0,2.0,2.0,1.0,3.0,4.0,6.0,2.0,0.0,0.0,6.0,17.0
673995,Marcus Camby,20:11,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0,4.0
673996,Chris Childs,18:01,2.0,2.0,0.0,0.0,2.0,2.0,0.0,2.0,2.0,5.0,0.0,0.0,1.0,6.0,13.0


In [83]:
players_data_grouped = players_data.groupby('player_name').mean()
players_data_grouped

Unnamed: 0_level_0,field_goals,field_goal_attempts,three_points_field_goals,three_points_field_goal_attempts,free_throws,free_throw_attempts,offensive_rebounds,defensive_rebounds,total_rebounds,assists,steals,blocks,turn_over,points,efficiency
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
A.C. Green,1.919708,4.313869,0.007299,0.072993,0.927007,1.299270,1.627737,3.036496,4.664234,0.635036,0.510949,0.138686,0.547445,4.773723,7.956204
A.J. Bramlett,0.500000,2.625000,0.000000,0.000000,0.000000,0.000000,1.500000,1.250000,2.750000,0.000000,0.125000,0.000000,0.375000,1.000000,1.750000
A.J. Guyton,2.075000,5.500000,0.912500,2.412500,0.462500,0.562500,0.275000,0.725000,1.000000,1.837500,0.250000,0.150000,0.775000,5.525000,5.237500
A.J. Hammons,0.772727,1.909091,0.227273,0.454545,0.409091,0.909091,0.363636,1.272727,1.636364,0.181818,0.045455,0.590909,0.454545,2.181818,3.000000
A.J. Price,2.085185,5.485185,0.833333,2.614815,0.800000,1.070370,0.262963,1.166667,1.429630,2.125926,0.474074,0.033333,0.874074,5.803704,6.196296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ömer Aşık,1.968504,3.748031,0.000000,0.001969,1.242126,2.255906,2.181102,4.838583,7.019685,0.594488,0.356299,0.730315,1.151575,5.179134,11.086614
Šarūnas Jasikevičius,2.060811,5.209459,0.891892,2.533784,1.412162,1.567568,0.236486,1.304054,1.540541,2.750000,0.445946,0.040541,1.425676,6.425676,7.898649
Žan Tabak,1.580645,3.096774,0.000000,0.000000,0.354839,0.822581,1.129032,2.387097,3.516129,0.532258,0.177419,0.483871,0.967742,3.516129,6.241935
Žarko Čabarkapa,1.560000,3.653333,0.160000,0.586667,0.986667,1.346667,0.653333,1.426667,2.080000,0.566667,0.220000,0.166667,0.793333,4.266667,4.846667
