In [1]:
import pandas as pd

In [10]:
from sklearn.linear_model import LinearRegression
from numpy.random import randn

In [2]:
# Import the CSV file and create a DataFrame.
player_df_final = pd.read_csv('player_data_final.csv')

# The player_df_final pandas DataFrame contains data from 40 players. 
# The first 26 rows represent human players and the last 17 rows represent Tune Squad.

In [7]:
# Create a DataFrame of only Tune Squad players.
ts_df = player_df_final.iloc[26: , :]
ts_df

# Import Tune Squad player names.
ts_name_df = pd.read_csv('tune_squad.csv', sep='\t') # the file is tab-separated
ts_name_df

# Merge the two DataFrames.
ts_df = pd.merge(ts_df, ts_name_df, on='ID', how='left', suffixes=('_type', '_name'))
ts_df.head()

Unnamed: 0,ID,player_type,points,possessions,team_pace,GP,MPG,TS%,AST,TO,USG,ORR,DRR,REBR,PER,player_name
0,31,tune_squad1,2049.0,1434.0,110.0,64.0,38.8,0.619,31.5,14.9,35.5,8.3,17.6,12.8,28.44,Sylvester
1,32,tune_squad2,1795.0,1481.8,112.1,62.0,35.4,0.608,31.9,14.5,32.0,6.5,22.5,12.9,23.34,Marvin the Martian
2,33,tune_squad3,1805.0,1509.9,108.6,64.0,35.4,0.622,27.9,13.9,36.0,5.9,27.7,12.2,22.41,Road Runner
3,34,tune_squad4,1743.0,1422.4,112.9,64.0,36.3,0.619,30.9,15.6,34.5,5.9,18.9,14.8,29.853138,Foghorn Leghorn
4,35,tune_squad5,1963.0,1539.1,117.4,59.771429,35.208333,0.633,32.3,16.2,34.0,5.9,19.8,13.1,27.16,Bugs Bunny


In [8]:
# Rearrange the columns to put the ID and player_name columns next to each other.

column_list = list(ts_df) # Create a list of the columns.

player_name = column_list.pop() # Remove the player_name column from the list (we know it's at the end, so we can simply drop it off the list).
column_list[1] = player_name # Put player_name in the second position of the column list, replacing the player_type column.

ts_df = ts_df[column_list] # Set our DataFrame to the new arrangement of columns.
ts_df.head()

Unnamed: 0,ID,player_name,points,possessions,team_pace,GP,MPG,TS%,AST,TO,USG,ORR,DRR,REBR,PER
0,31,Sylvester,2049.0,1434.0,110.0,64.0,38.8,0.619,31.5,14.9,35.5,8.3,17.6,12.8,28.44
1,32,Marvin the Martian,1795.0,1481.8,112.1,62.0,35.4,0.608,31.9,14.5,32.0,6.5,22.5,12.9,23.34
2,33,Road Runner,1805.0,1509.9,108.6,64.0,35.4,0.622,27.9,13.9,36.0,5.9,27.7,12.2,22.41
3,34,Foghorn Leghorn,1743.0,1422.4,112.9,64.0,36.3,0.619,30.9,15.6,34.5,5.9,18.9,14.8,29.853138
4,35,Bugs Bunny,1963.0,1539.1,117.4,59.771429,35.208333,0.633,32.3,16.2,34.0,5.9,19.8,13.1,27.16


In [9]:
# Create a list of only the column names we're interested in.
game_stat_cols = list(ts_df.iloc[:, 7:-1])
game_stat_stdevs = []

# Create a list of standard deviations for each stat.
for stat in game_stat_cols:
    game_stat_stdevs.append(ts_df[stat].std())

# Create a Series of the standard deviations, with the stat names as the index.
stdev_s = pd.Series(game_stat_stdevs, index=game_stat_cols)
stdev_s

TS%     0.008262
AST     2.140494
TO      0.797197
USG     1.892718
ORR     1.139465
DRR     3.017962
REBR    1.802564
dtype: float64

In [11]:
# X: The input data we use to predict y
# y: The output value that you want the machine learning model to predict -> PER

# Get the dependent and independent variables for modeling the PER.
X = player_df_final.iloc[:, 7:-1].to_numpy()
y = player_df_final.iloc[:, -1]

# Define and fit the model.
lin_reg = LinearRegression()
lin_reg.fit(X, y) 
# This code gives us a machine learning model (lin_reg) that we can use to predict PER 
# based on a set of the seven input stats that we used to train the model (TS%, AST, TO, USG, ORR, DRR, and REBR).

In [12]:
# Print the player with the highest and lower PER for each iteration.
print('Iteration # \thigh PER \tlow PER')

# Run the simulation 10 times.
for i in range(10):

    # Define an empty temporary DataFrame for each iteration.
    # The columns of this DataFrame are the player stats and the index is the players' names.
    game_df = pd.DataFrame(columns=game_stat_cols, index=list(ts_df['player_name']))
    
    # Loop through each stat.
    for stat in game_stat_cols:
        
        # Each player's stats are used to generate a random value for each iteration.
        game_df[stat] = list(ts_df[stat] + randn(len(ts_df)) * stdev_s[stat])
    
    # Use the fitted model to predict players' PERs based on the randomized data.
    game_df['PER'] = lin_reg.predict(game_df)

    # Print the player with the highest and lower PER for each iteration.
    print('Iteration {}'.format(i+1) + ' \t' + game_df['PER'].idxmax() + ' \t' + game_df['PER'].idxmin())


#  If we see a high PER player's PER start to drop, we might consider giving that player a water break. 
#  The player's lower PER probably means that they're getting tired.

Iteration # 	high PER 	low PER
Iteration 1 	Lola Bunny 	Penelope
Iteration 2 	Lola Bunny 	Tasmanian Devil
Iteration 3 	Foghorn Leghorn 	Tweety
Iteration 4 	Sylvester 	Tasmanian Devil
Iteration 5 	Lola Bunny 	Tasmanian Devil
Iteration 6 	Elmer Fudd 	Penelope
Iteration 7 	Lola Bunny 	Tasmanian Devil
Iteration 8 	Gossamer 	Penelope
Iteration 9 	Lola Bunny 	Penelope
Iteration 10 	Wile E. Coyote 	Tasmanian Devil




In [13]:
# So, we should create a CSV file that will contain randomized player data over four iterations: 
# 0 minutes (the start of the game), 12 minutes, 24 minutes, and 36 minutes:

# Initialize four empty DataFrames, one for each 12-minute period.
number_of_iterations = 4
df_list = [pd.DataFrame(columns=game_stat_cols, index=list(ts_df['player_name'])) for i in range(number_of_iterations)]

# For each period, generate randomized player data and predict the PER.
# Use the model fitted earlier.
for df in df_list:
    for stat in game_stat_cols:
        df[stat] = list(ts_df[stat] + randn(len(ts_df)) * stdev_s[stat])
    df['PER'] = lin_reg.predict(df)

# Concatenate the DataFrames and make the players' names the index.
game_df = pd.concat(df_list)
game_df.rename_axis('player_name', inplace=True)

# Create another index for the period in question.
minutes = [(x // len(ts_df)) * 12 for x in range(len(game_df))]
game_df['minutes'] = minutes
game_df.set_index('minutes', append=True, inplace=True)
game_df = game_df.swaplevel()

game_df

# Export the finished DataFrame to CSV.
game_df.to_csv('game_stats.csv')

