In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt 

In [8]:
codes_df = pd.read_csv('Event_Codes.txt', sep='\t')
game_lineup_df = pd.read_csv('Game_Lineup.txt', sep='\t')
playxplay_df = pd.read_csv('Play_by_Play.txt', sep='\t')

### EDA
*Descriptions taken from prompt.*
Codes_df: This dataset provides look up values for the event message types and action types found in the play by play dataset. Each code is converted to an English language description of the event

In [12]:
display(codes_df.head(10))
print(codes_df.dtypes)

Unnamed: 0,Event_Msg_Type,Action_Type,Event_Msg_Type_Description,Action_Type_Description
0,1,0,Made Shot,No Shot
1,1,1,Made Shot,Jump Shot
2,1,2,Made Shot,Running Jump Shot
3,1,3,Made Shot,Hook Shot
4,1,4,Made Shot,Tip Shot
5,1,5,Made Shot,Layup Shot
6,1,6,Made Shot,Driving Layup Shot
7,1,7,Made Shot,Dunk Shot
8,1,8,Made Shot,Slam Dunk Shot
9,1,9,Made Shot,Driving Dunk Shot


Event_Msg_Type                 int64
Action_Type                    int64
Event_Msg_Type_Description    object
Action_Type_Description       object
dtype: object


Game_lineup_df provides the start of period player availability.
* Game_id– a unique game code for each game
* Period(Quarter) – the associated period of the line up (overtime periods are indicated by values greater than 
* Person_id– a unique identifier for each player
* Team_id– a unique identifier for each team 
* Status– a variable indicating whether the player is active (A) or inactive (I)

In [13]:
display(game_lineup_df.head(10))
print(game_lineup_df.dtypes)

Unnamed: 0,Game_id,Period,Person_id,Team_id,status
0,006728e4c10e957011e1f24878e6054a,0,ae53f8ba6761b64a174051da817785bc,01be0ad4af7aeb1f6d2cc2b6b6d6d811,A
1,006728e4c10e957011e1f24878e6054a,0,44230324724c84f122ac62a5f0918314,45ba8fc87f55b1191c50c400dc7ed11c,A
2,006728e4c10e957011e1f24878e6054a,0,8d2127290c94bd41b82a2938734bc750,01be0ad4af7aeb1f6d2cc2b6b6d6d811,A
3,006728e4c10e957011e1f24878e6054a,0,48ec4e6c52f418d5ca4ef510ba473ea0,45ba8fc87f55b1191c50c400dc7ed11c,A
4,006728e4c10e957011e1f24878e6054a,0,e816ff284dc3f965b8f3d605a3b91bae,01be0ad4af7aeb1f6d2cc2b6b6d6d811,A
5,006728e4c10e957011e1f24878e6054a,0,ed95dff5440fadf3042b5acacea81eed,01be0ad4af7aeb1f6d2cc2b6b6d6d811,A
6,006728e4c10e957011e1f24878e6054a,0,616281dee946056b071699476fdee9ec,45ba8fc87f55b1191c50c400dc7ed11c,A
7,006728e4c10e957011e1f24878e6054a,0,e814950408915f43de2b079dce7c21c5,45ba8fc87f55b1191c50c400dc7ed11c,A
8,006728e4c10e957011e1f24878e6054a,0,2ad626904c8b28cceb8e12c624a84240,01be0ad4af7aeb1f6d2cc2b6b6d6d811,A
9,006728e4c10e957011e1f24878e6054a,0,bd45fe7dba52aa2cd00ba80ff107d05b,01be0ad4af7aeb1f6d2cc2b6b6d6d811,I


Game_id      object
Period        int64
Person_id    object
Team_id      object
status       object
dtype: object


This dataset provides play by play information on the event level for each game.To properly sort the events in a game, use the following sequence of sorted columns: Period(ascending), PC_Time(descending), WC_Time(ascending), Event_Num(ascending)
* Event_Num– an ordered counter for each event in a game. Note, this number may not be perfectly sequential so please use the sorting methodology outlined above
* Event_Msg_Type, Action_Type– coded descriptions of what happened during the event (see the Event_Codes.txtdataset to see the codes)
* WC_Time– the in-arena time of the event in Unix format. It is coded as tenths of a second
* PC_Time– the time on the game clock in tenths of a second (e.g. 7200 corresponds to 720 seconds/12 minutes remaining in the quarter)
* Option1– on a shot attempt, this column will tell you the point value of the shot•On free throw attempts, if the value in this column is 1, it means it was a made free throw, otherwise, it was missed 
* Person1, Person2– the Person_id’s of the players who are directly associated with the event (e.g. if the event is an assisted made basket, Person1is the shot maker and Person2 is the player who assisted)• In the case of a substitution, the Event_Msg_Type will be 8, Person1will be the Person_id for the player leaving the game, and Person2will be the Person_idfor the player entering the game
* Team_id– in most scenarios, this is the Team_idassociated with the Person1 column. However, there are instances when this is not the case. To accurately and consistently identify a player’s team, we suggest merging with the Game_Lineup dataset on the Person1 and Person2 columns.

In [18]:
playxplay_df.sort_values(['Period', 'PC_Time', 'WC_Time', 'Event_Num'], 
                         ascending = [True, False, True, True],
                         inplace=True) #sorted based on above instructions
display(playxplay_df.head(10))
print(playxplay_df.dtypes)

Unnamed: 0,Game_id,Event_Num,Event_Msg_Type,Period,WC_Time,PC_Time,Action_Type,Option1,Option2,Option3,Team_id,Person1,Person2,Person3,Team_id_type,Person1_type,Person2_type,Person3_type
20607,a466c76e072fd634f6d4a8938fb63caa,2,12,1,330350,7200,0,0,0,0,1473d70e5646a26de3c52aa1abd85b1f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0,0,0,0
20608,a466c76e072fd634f6d4a8938fb63caa,4,10,1,330350,7200,0,0,0,0,0a6fc39f34702cff144525dfd265f8fa,b265ffd84dbeef427136d140811e7d5f,90115ca1e9f545b291b18a7db8d2a700,95c8abc40fb4ecf4fa5627975450dc95,2,4,5,4
30761,d36de5306a05d0cdbd51a7294ac91359,2,12,1,330550,7200,0,0,0,0,1473d70e5646a26de3c52aa1abd85b1f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0,0,0,0
31634,d522ea3836b8e1977ab10b3bfeb5dfd1,2,12,1,330550,7200,0,0,0,0,1473d70e5646a26de3c52aa1abd85b1f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0,0,0,0
31635,d522ea3836b8e1977ab10b3bfeb5dfd1,4,10,1,330550,7200,0,0,0,0,8362dfecb7412f12a12713852d2ae566,ef8b068ab7ac9d387b256404acd24cd5,64e4b07f8d90902bcab1aa9e6b5d2ecf,95920e4bf5b6c15ba8dffbf959b38ba5,2,4,5,4
30762,d36de5306a05d0cdbd51a7294ac91359,4,10,1,330560,7200,0,0,0,0,0a6fc39f34702cff144525dfd265f8fa,95c17413c3df2cb6709f0b23d27b5340,b265ffd84dbeef427136d140811e7d5f,95c8abc40fb4ecf4fa5627975450dc95,3,4,5,5
11815,68f62f1c5cb84257932676bef8cc7a1a,2,12,1,379620,7200,0,0,0,0,1473d70e5646a26de3c52aa1abd85b1f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0,0,0,0
11816,68f62f1c5cb84257932676bef8cc7a1a,4,10,1,379630,7200,0,0,0,0,012059d397c0b7e5a30a5bb89c0b075e,bca65b4d1889c9c4c97d5dfa6efb117b,8c646f2710d4f64955903039fdc6374e,b43d8e250bbf5bf03bcbbd2f48141acd,3,4,5,5
14617,7b4b6c18c831bc54239f663d0e604497,2,12,1,402920,7200,0,0,0,0,1473d70e5646a26de3c52aa1abd85b1f,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0370a0d090da0d0edc6319f120187e0e,0,0,0,0
14618,7b4b6c18c831bc54239f663d0e604497,4,10,1,402920,7200,0,0,0,0,cd45058739ed0ac8229849c6249aad48,bfef77a3e57907855444410d490e7bfd,0b703add83e9319d7b3accc14e153436,31598ba01a3fff03ed0a87d7dea11dfe,2,4,5,4


Game_id           object
Event_Num          int64
Event_Msg_Type     int64
Period             int64
WC_Time            int64
PC_Time            int64
Action_Type        int64
Option1            int64
Option2            int64
Option3            int64
Team_id           object
Person1           object
Person2           object
Person3           object
Team_id_type       int64
Person1_type       int64
Person2_type       int64
Person3_type       int64
dtype: object
