In [7]:
%pwd

'/workspaces/Coordinated-Multi-Agent-Imitation-Learning/code'

In [8]:
from datetime import datetime
import numpy as np
import pandas as pd
import glob, os, sys, math, warnings, copy, time
import matplotlib.pyplot as  plt

# customized ftns 
from preprocessing import *
from utilities import *
from model import *
from train import train_all_single_policies
# ---------------------------------------------------------
%matplotlib inline
%load_ext autoreload
%autoreload 2
warnings.filterwarnings('ignore')
# warnings.filterwarnings(action='once')
# ---------------------------------------------------------
# directories
main_dir = '../'
game_dir = main_dir+'data/'
Data = LoadData(main_dir, game_dir)
models_path = './models/'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Load raw data

In [9]:
# %%time
game_id = '0021500463'
game_data = Data.load_game(game_id)
events_df = pd.DataFrame(game_data['events'])
print('raw events shape:', events_df.shape)
home_id = events_df.loc[0].home['teamid']
events_df.head(3)

raw events shape: (231, 8)


Unnamed: 0,playbyplay,moments,visitor,orig_events,start_time_left,home,quarter,end_time_left
0,GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,"[[1, 1451351428029, 708.28, 12.78, None, [[-1,...","{'abbreviation': 'TOR', 'players': [{'playerid...",[0],708.28,"{'abbreviation': 'CHI', 'players': [{'playerid...",1,702.31
1,GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,"[[1, 1451351428029, 708.28, 12.78, None, [[-1,...","{'abbreviation': 'TOR', 'players': [{'playerid...",[1],708.28,"{'abbreviation': 'CHI', 'players': [{'playerid...",1,686.28
2,GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,"[[1, 1451351444029, 692.25, 12.21, None, [[-1,...","{'abbreviation': 'TOR', 'players': [{'playerid...","[2, 3]",692.25,"{'abbreviation': 'CHI', 'players': [{'playerid...",1,668.42


In [10]:
events_df.home[0]['teamid']

1610612741

In [11]:
events_df.visitor[0]['teamid']

1610612761

#### Get some suplementary data

In [12]:
# # play id to play roles/positions
# id_role = id_position(events_df)
# check_game_roles_duplicates(id_role)

# # its possible that F has similar role as G-f or F-G, we create empty slots to ensure meta order
# # ddentify defending and offending runs (this is included in process_moments)
# court_index = Data.load_csv('./meta_data/court_index.csv')
# court_index = dict(zip(court_index.game_id, court_index.court_position))

# # home and visitor ids
# homeid = events_df.loc[0].home['teamid']
# awayid = events_df.loc[0].visitor['teamid']

### FILTER EVENTS

In [13]:
# events_df.loc[3].playbyplay.to_dict('list')

In [14]:
# events_df.moments[3]

In [15]:
# n_event = 233
# P = PlotGame('0021500196', main_dir, game_dir)
# for i in range(len(events_df.moments[n_event])):
#     P.load_moment2img(game_data, n_event, i)

### as we saw that the playbyplay description of events is not accurate, so for now at least we will not try to filter by events

In [16]:
def remove_non_eleven(events_df, event_length_th=25, verbose=False):
    df = events_df.copy()
    home_id = df.loc[0]['home']['teamid']
    away_id = df.loc[0]['visitor']['teamid']
    def remove_non_eleven_(moments, event_length_th=25, verbose=False):
        ''' Go through each moment, when encounters balls not present on court,
            or less than 10 players, discard these moments and then chunk the following moments 
            to as another event.

            Motivations: balls out of bound or throwing the ball at side line will
                probably create a lot noise for the defend trajectory learning model.
                We could add the case where players are less than 10 (it could happen),
                but this is not allowed in the model and it requres certain input dimension.

            moments: A list of moments
            event_length_th: The minimum length of an event

            segments: A list of events (or, list of moments) e.g. [ms1, ms2] where msi = [m1, m2]
        '''

        segments = []
        segment = []
        # looping through each moment
        for i in range(len(moments)):
            # get moment dimension
            moment_dim = len(moments[i][5])
            # 1 bball + 10 players
            if moment_dim == 11:
                segment.append(moments[i])
            # less than ten players or basketball is not on the court
            else:
    #             print('less than 11')
                # only grab these satisfy the length threshold
                if len(segment) >= event_length_th:
                    segments.append(segment)
                # reset the segment to empty list
                segment = []
        # grab the last one
        if len(segment) >= event_length_th:
            segments.append(segment)
        if len(segments) == 0:
            if verbose: print('Warning: Zero length event returned')
        return segments
    # process for each event (row)
    df['chunked_moments'] = df.moments.apply(lambda m: remove_non_eleven_(m, event_length_th, verbose))
    # in case there's zero length event
    df = df[df['chunked_moments'].apply(lambda e: len(e)) != 0]
    df['chunked_moments'] = df['chunked_moments'].apply(lambda e: e[0])
    return df['chunked_moments'].values, {'home_id': home_id, 'away_id': away_id}

In [17]:
r, team_ids = remove_non_eleven(events_df)

In [18]:
events_df1 = pd.DataFrame({'moments': r})

In [19]:
def chunk_shotclock(events_df, event_length_th=25, verbose=False):
    df = events_df.copy()
    def chunk_shotclock_(moments, event_length_th, verbose):
        ''' When encounters ~24secs or game stops, chunk the moment to another event.
            shot clock test:
            1) c = [20.1, 20, 19, None,18, 12, 9, 7, 23.59, 23.59, 24, 12, 10, None, None, 10]
              result = [[20.1, 20, 19], [18, 12, 9, 7], [23.59], [23.59], [24, 12, 10]]
            2) c = [20.1, 20, 19, None, None,18, 12, 9, 7, 7, 7, 23.59, 23.59, 24, 12, 10, None, None, 10]
              result = [[20.1, 20, 19], [18, 12, 9, 7], [7], [7], [23.59], [23.59], [24, 12, 10]]

            Motivations: game flow would make sharp change when there's 24s or 
            something happened on the court s.t. the shot clock is stopped, thus discard
            these special moments and remake the following valid moments to be next event.

            moments: A list of moments
            event_length_th: The minimum length of an event
            verbose: print out exceptions or not

            segments: A list of events (or, list of moments) e.g. [ms1, ms2] where msi = [m1, m2] 
        '''

        segments = []
        segment = []
        # naturally we won't get the last moment, but it should be okay
        for i in range(len(moments)-1):
            current_shot_clock_i = moments[i][3]
            next_shot_clock_i = moments[i+1][3]
            # sometimes the shot clock value is None, thus cannot compare
            try:
                # if the game is still going i.e. sc is decreasing
                if next_shot_clock_i < current_shot_clock_i:
                    segment.append(moments[i])
                # for any reason the game is sstopped or reset
                else:
                    # not forget the last moment before game reset or stopped
                    if current_shot_clock_i < 24.:
                        segment.append(moments[i])
                    # add length condition
                    if len(segment) >= event_length_th:
                        segments.append(segment)
                    # reset the segment to empty list
                    segment = []
            # None value
            except Exception as e:
                if verbose: print(e)
                # not forget the last valid moment before None value
                if current_shot_clock_i != None:
                    segment.append(moments[i])    
                if len(segment) >= event_length_th:
                    segments.append(segment)
                # reset the segment to empty list
                segment = []

        # grab the last one
        if len(segment) >= event_length_th:
            segments.append(segment)            
        if len(segments) == 0:
            if verbose: print('Warning: Zero length event returned')
        return segments
    
    # process for each event (row)
    df['chunked_moments'] = df.moments.apply(lambda m: chunk_shotclock_(m, event_length_th, verbose))
    # in case there's zero length event
    df = df[df['chunked_moments'].apply(lambda e: len(e)) != 0]
    df['chunked_moments'] = df['chunked_moments'].apply(lambda e: e[0])
    return df['chunked_moments'].values

In [20]:
r1 = chunk_shotclock(events_df1)
events_df2 = pd.DataFrame({'moments': r1})

In [21]:
def chunk_halfcourt(events_df, event_length_th=25, verbose=False):
    df = events_df.copy()
    def chunk_halfcourt_(moments, event_length_th, verbose):
        ''' Discard any plays that are not single sided. When the play switches 
            court withhin one event, we chunk it to be as another event
        '''

        # NBA court size 94 by 50 feet
        half_court = 94/2. # feet
        cleaned = []

        # remove any moments where two teams are not playing at either side of the court
        for i in moments:
            # the x coordinates is on the 3rd or 2 ind of the matrix,
            # the first and second is team_id and player_id
            team1x = np.array(i[5])[1:6, :][:, 2]    # player data starts from 1, 0 ind is bball
            team2x = np.array(i[5])[6:11, :][:, 2]
            # if both team are on the left court:
            if sum(team1x <= half_court)==5 and sum(team2x <= half_court)==5:
                cleaned.append(i)
            elif sum(team1x >= half_court)==5 and sum(team2x >= half_court)==5:
                cleaned.append(i)

        # if teamns playing court changed during same list of moments,
        # chunk it to another event
        segments = []
        segment = []
        for i in range(len(cleaned)-1):
            current_mean = np.mean(np.array(cleaned[i][5])[:, 2], axis=0)
            current_pos = 'R' if current_mean >= half_court else 'L'
            next_mean = np.mean(np.array(cleaned[i+1][5])[:, 2], axis=0)
            next_pos = 'R' if next_mean >= half_court else 'L'

            # the next moment both team are still on same side as current
            if next_pos == current_pos:
                segment.append(cleaned[i])
            else:
                if len(segment) >= event_length_th:
                    segments.append(segment)
                segment = []
        # grab the last one
        if len(segment) >= event_length_th:
            segments.append(segment)            
        if len(segments) == 0:
            if verbose: print('Warning: Zero length event returned')
        return segments
    
    # process for each event (row)
    df['chunked_moments'] = df.moments.apply(lambda m: chunk_halfcourt_(m, event_length_th, verbose))
    # in case there's zero length event
    df = df[df['chunked_moments'].apply(lambda e: len(e)) != 0]
    df['chunked_moments'] = df['chunked_moments'].apply(lambda e: e[0])
    return df['chunked_moments'].values

In [22]:
r2 = chunk_halfcourt(events_df2)
events_df3 = pd.DataFrame({'moments': r2})

In [23]:
# court_index

In [24]:
court_index = pd.read_csv('./meta_data/court_index.csv')
court_index = dict(zip(court_index.game_id, court_index.court_position))
court_index[int('0021500196')]

1

In [25]:
def reorder_teams(events_df, game_id):
    df = events_df.copy()
    def reorder_teams_(input_moments, game_id):
        ''' 1) the matrix always lays as home top and away bot VERIFIED
            2) the court index indicate which side the top team (home team) defends VERIFIED

            Reorder the team position s.t. the defending team is always the first 

            input_moments: A list moments
            game_id: str of the game id
        '''
        # now we want to reorder the team position based on meta data
        court_index = pd.read_csv('./meta_data/court_index.csv')
        court_index = dict(zip(court_index.game_id, court_index.court_position))

        full_court = 94.
        half_court = full_court/2. # feet
        home_defense = court_index[int(game_id)]
        moments = copy.deepcopy(input_moments)
        for i in range(len(moments)):
            home_moment_x = np.array(moments[i][5])[1:6,2]
            away_moment_x = np.array(moments[i][5])[6:11,2]
            quarter = moments[i][0]
            # if the home team's basket is on the left
            if home_defense == 0:
                # first half game
                if quarter <= 2:
                    # if the home team is over half court, this means they are doing offense
                    # and the away team is defending, so switch the away team to top
                    if sum(home_moment_x>=half_court)==5 and sum(away_moment_x>=half_court)==5:
                        moments[i][5][1:6], moments[i][5][6:11] = moments[i][5][6:11], moments[i][5][1:6]
                        for l in moments[i][5][1:6]:
                            l[2] = full_court - l[2]
                        for l in moments[i][5][6:11]:
                            l[2] = full_court - l[2]
                # second half game      
                elif quarter > 2: # second half game, 3,4 quarter
                    # now the home actually gets switch to the other court
                    if sum(home_moment_x<=half_court)==5 and sum(away_moment_x<=half_court)==5:
                        moments[i][5][1:6], moments[i][5][6:11] = moments[i][5][6:11], moments[i][5][1:6]
                    elif sum(home_moment_x>=half_court)==5 and sum(away_moment_x>=half_court)==5:
                        for l in moments[i][5][1:6]:
                            l[2] = full_court - l[2]
                        for l in moments[i][5][6:11]:
                            l[2] = full_court - l[2]
                else:
                    print('Should not be here, check quarter value')
            # if the home team's basket is on the right
            elif home_defense == 1:
                # first half game
                if quarter <= 2:
                    # if the home team is over half court, this means they are doing offense
                    # and the away team is defending, so switch the away team to top
                    if sum(home_moment_x<=half_court)==5 and sum(away_moment_x<=half_court)==5:
                        moments[i][5][1:6], moments[i][5][6:11] = moments[i][5][6:11], moments[i][5][1:6]
                    elif sum(home_moment_x>=half_court)==5 and sum(away_moment_x>=half_court)==5:
                        for l in moments[i][5][1:6]:
                            l[2] = full_court - l[2]
                        for l in moments[i][5][6:11]:
                            l[2] = full_court - l[2]
                # second half game      
                elif quarter > 2: # second half game, 3,4 quarter
                    # now the home actually gets switch to the other court
                    if sum(home_moment_x>=half_court)==5 and sum(away_moment_x>=half_court)==5:
                        moments[i][5][1:6], moments[i][5][6:11] = moments[i][5][6:11], moments[i][5][1:6]
                        for l in moments[i][5][1:6]:
                            l[2] = full_court - l[2]
                        for l in moments[i][5][6:11]:
                            l[2] = full_court - l[2]
                else:
                    print('Should not be here, check quarter value')
        return moments
    return [reorder_teams_(m, game_id) for m in df.moments.values]

In [26]:
r3 = reorder_teams(events_df3, game_id)
events_df4 = pd.DataFrame({'moments': r3})

In [27]:
def flatten_moments(events_df):
    df = events_df.copy()
    def flatten_moment(moment):
        m = np.array(moment[5])
        features = np.concatenate((m[1:11, 2:4].reshape(-1),    # x,y of all 10 players 
                                   m[0][2:5],                   # basketball x,y,z 
                                   np.array([moment[0]]),       # quarter number 
                                   np.array([moment[2]]),       # time in seconds left to the end of the period
                                   np.array([moment[3]])))      # shot clock 
        return features
    
    def get_team_ids(moment):
        m = np.array(moment[5])
        team_id1 = set(m[1:6, 0])
        team_id2 = set(m[6:11, 0])
        assert len(team_id1) == len(team_id2) == 1
        assert team_id1 != team_id2
        return [list(team_id1)[0], list(team_id2)[0]]
        
        
    df['flattened'] = df.moments.apply(lambda ms: [flatten_moment(m) for m in ms])
    df['team_ids'] = df.moments.apply(lambda ms: get_team_ids(ms[0])) # just use the first one to determine        
    
    return df['flattened'].values, df['team_ids'].values

r4, team_ids = flatten_moments(events_df4)
events_df5 = pd.DataFrame({'moments': r4})   


def create_static_features(events_df):
    df = events_df.copy()
    def create_static_features_(moment):
        ''' moment: flatten moment i.e. (25=10*2+3+2,)'''
        # distance of each players to the ball
        player_xy = moment[:10*2]
        b_xy = moment[10*2:10*2+2]
        hoop_xy = np.array([3.917, 25])

        def disp_(pxy, target):
            # dispacement to bball
            disp = pxy.reshape(-1, 2) - np.tile(target, (10, 1))
            r = np.sqrt(disp[:,0]**2 + disp[:, 1]**2)               # r 
            cos_theta = disp[:, 0]/r                                # costheta
            sin_theta = disp[:, 1]/r                                # sintheta
            theta = np.arccos(cos_theta)                            # theta
            return np.concatenate((r, cos_theta, sin_theta, theta))
        return np.concatenate((moment, disp_(player_xy, b_xy), disp_(player_xy, hoop_xy)))
    df['enriched'] = df.moments.apply(lambda ms: np.vstack([create_static_features_(m) for m in ms]))
    return df['enriched'].values
    
r5 = create_static_features(events_df5)
events_df6 = pd.DataFrame({'moments': r5})

In [28]:
def create_dynamic_features(events_df, fs):
    df = events_df.copy()
    def create_dynamic_features_(moments, fs):
        ''' moments: (moments length, n existing features)'''
        pxy = moments[:, :23] # get the players x,y and basketball x,y,z coordinates
        next_pxy = np.roll(pxy, -1, axis=0) # get next frame value
        vel = ((next_pxy - pxy)/fs)[:-1, :] # the last velocity is not meaningful
        # when we combine this back to the original features, we shift one done,
        # i.e. [p1, p2, ..., pT] combine [_, p2-p1, ...., pT-pT_1]
        # the reason why we shift is that we don't want to leak next position info
        return np.column_stack([moments[1:, :], vel])
    df['enriched'] = df.moments.apply(lambda ms: create_dynamic_features_(ms, fs))
    return df['enriched'].values


In [29]:
r6 = create_dynamic_features(events_df6, 1/25.)
events_df7 = pd.DataFrame({'moments': r6})

In [30]:
len(r6)

177

In [31]:
sample = np.arange(6)
batchsize1 = 5
for i in iterate_minibatches(sample, sample+1, batchsize1, False):
    print(i)

(array([0, 1, 2, 3, 4]), array([1, 2, 3, 4, 5]))
(array([1, 2, 3, 4, 5]), array([2, 3, 4, 5, 6]))


##### role alignment

In [32]:
# 10*2 (10 players with x,y) + 3(bball x,y,z) + 1(qtr number) + 1(time left in qtr) + 1(sc) + 
# 10*(4(r,cos,sin,theta)_bball + 4(r, cos, sin, theta)_hoop) + 10*2 (10 players vx, vy) + 3(bball vx,vy,vz)
n_fts = 10*2 + 3 + 1 + 1 + 1 + 10*(4+4) + 10*2 + 3
n_fts

129

In [33]:
from hidden_role_learning import *

In [34]:
# HSL.defend_players

In [35]:
HSL = HiddenStructureLearning(events_df7, libmode='hmmlearn', defend_iter=100, offend_iter=100)
t1 = time.time()
role_assignments, result1 = HSL.assign_roles(player_inds=HSL.defend_players, n_iter=HSL.defend_iter, mode='euclidean')
print('took: {0:.2f}'.format((time.time()-t1)))

2026-01-22 01:52:07,332 | INFO : Assigning roles by euclidean method.
2026-01-22 01:52:07,332 | INFO : Training for player_inds: [0, 1, 2, 3, 4] with iterations: 100


         1 -3561157.03863647             +nan
         2 -3026908.92024088 +534248.11839559
         3 -2803643.55828825 +223265.36195263
         4 -2763033.08916626  +40610.46912199
         5 -2719624.49104469  +43408.59812157
         6 -2675464.01634971  +44160.47469498
         7 -2652683.54941642  +22780.46693329
         8 -2647747.45150615   +4936.09791027
         9 -2645777.48410347   +1969.96740268
        10 -2644685.01805901   +1092.46604446
        11 -2643920.69507460    +764.32298442
        12 -2643374.01644864    +546.67862596
        13 -2642896.94025361    +477.07619503
        14 -2642552.58053685    +344.35971677
        15 -2642317.54193908    +235.03859776
        16 -2642121.04366939    +196.49826969
        17 -2641998.07563634    +122.96803305
        18 -2641929.99570266     +68.07993367
        19 -2641880.64776562     +49.34793705
        20 -2641847.69141571     +32.95634991
        21 -2641831.79323706     +15.89817864
        22 -2641823.21469890      

took: 14.15


        99 -2641803.07640306      +0.01881472
       100 -2641803.05867071      +0.01773235


---

In [36]:
from pomegranate import *
import logging


In [38]:
# import utilities as utils
# utils.enable_gpu()
# utils.is_gpu_enabled()

In [39]:
from pomegranate.hmm import HiddenMarkovModel

h = HiddenStructureLearning(events_df7, libmode='pom')
data = h.create_hmm_input(h.defend_players)

In [None]:
len(data)

In [None]:
np.array(data[:2]).shape

In [None]:
data[1].shape

In [None]:
type(data)

In [None]:
Model = HiddenMarkovModel.from_samples(NormalDistribution, n_components=5, X=data, 
                                       stop_threshold=1e-3, n_jobs=4, verbose=True)

In [None]:
pred = Model.predict_proba(X[0])


In [41]:
def process_game_data_(game_id, events_df, event_threshold, subsample_factor):
    # remove non elevens
    logging.debug('removing non eleven')
    result, _ = remove_non_eleven(events_df, event_threshold)
    df = pd.DataFrame({'moments': result})
    # chunk based on shot clock, Nones or stopped timer
    logging.debug('chunk shotclock')
    result = chunk_shotclock(df, event_threshold)
    df = pd.DataFrame({'moments': result})
    # chunk based on half court and normalize to all half court
    logging.debug('chunk half court')
    result = chunk_halfcourt(df, event_threshold)
    df = pd.DataFrame({'moments': result})
    # reorder team matrix s.t. the first five players are always defend side players
    logging.debug('reordering team')
    result = reorder_teams(df, game_id)
    df = pd.DataFrame({'moments': result})

    # features 
    # flatten data
    logging.debug('flatten moment')
    result, team_ids = flatten_moments(df)
    df = pd.DataFrame({'moments': result})  
    # static features
    logging.debug('add static features')
    result = create_static_features(df)
    df = pd.DataFrame({'moments': result})
    # dynamic features
    logging.debug('add velocities')
    fs = 1/25.
    result = create_dynamic_features(df, fs)
    # one hot encoding
    logging.debug('add one hot encoding')
    OHE = OneHotEncoding()
    result = OHE.add_ohs(result, team_ids)
    df = pd.DataFrame({'moments': result})
    return df

In [42]:
game_data = Data.load_game('0021500024')
events_df = pd.DataFrame(game_data['events'])
df = process_game_data_('0021500024', events_df, 100, 2)

h = HiddenStructureLearning(events_df7, libmode='pom')
event = df.moments.values
# create X: array-like, shape (n_samples, n_features)
plater_fts = [ms[:, h.find_features_ind(player)[1]] for player in h.defend_players for ms in event]

In [43]:

# data = h.create_hmm_input(h.defend_players)
data = plater_fts



In [44]:
from pomegranate import MultivariateGaussianDistribution as MGD
Model = HiddenMarkovModel.from_samples(MGD, 2, data, 
                                       stop_threshold=1e-3, n_jobs=4, verbose=True)

[1] Improvement: 29551931419.073864	Time (s): 1.152
[2] Improvement: 1386352.4865913875	Time (s): 1.172
[3] Improvement: 867995.8644302773	Time (s): 1.153
[4] Improvement: 653466.1948697192	Time (s): 1.163
[5] Improvement: 623951.5922823673	Time (s): 1.161
[6] Improvement: 399550.63579213014	Time (s): 1.163
[7] Improvement: 88930.11078365962	Time (s): 1.161
[8] Improvement: 15836.47395412065	Time (s): 1.143
[9] Improvement: 5320.209096389124	Time (s): 1.17
[10] Improvement: 1953.182450049324	Time (s): 1.173
[11] Improvement: 778.4072311469354	Time (s): 1.192
[12] Improvement: 158.60597424558364	Time (s): 1.165
[13] Improvement: 32.80135096539743	Time (s): 1.132
[14] Improvement: 1.8722274370957166	Time (s): 1.172
[15] Improvement: 0.09650830924510956	Time (s): 1.153
[16] Improvement: 0.003003370948135853	Time (s): 1.152
[17] Improvement: 0.00023870402947068214	Time (s): 1.174
Total Training Improvement: 29555975747.61064
Total Training Time (s): 20.7764


In [45]:
from pomegranate import MultivariateGaussianDistribution as MGD
Model = HiddenMarkovModel.from_samples(MGD, 2, data, 
                                       stop_threshold=1e-3, n_jobs=4, verbose=True)

[1] Improvement: 28071990010.60966	Time (s): 1.143
[2] Improvement: 848503.700357995	Time (s): 1.165
[3] Improvement: 2104139.120956857	Time (s): 1.145
[4] Improvement: 698939.5610704515	Time (s): 1.147
[5] Improvement: 522758.9260767846	Time (s): 1.132
[6] Improvement: 576553.4507804135	Time (s): 1.143
[7] Improvement: 359721.3768260225	Time (s): 1.143
[8] Improvement: 88573.34984099865	Time (s): 1.173
[9] Improvement: 29236.310910562286	Time (s): 1.193
[10] Improvement: 7753.293109112419	Time (s): 1.17
[11] Improvement: 1738.474193589529	Time (s): 1.174
[12] Improvement: 386.459237549454	Time (s): 1.15
[13] Improvement: 84.62548704002984	Time (s): 1.132
[14] Improvement: 18.187687416095287	Time (s): 1.182
[15] Improvement: 3.900332967052236	Time (s): 1.173
[16] Improvement: 0.8342843267600983	Time (s): 1.141
[17] Improvement: 0.17760169063694775	Time (s): 1.151
[18] Improvement: 0.0380541505292058	Time (s): 1.146
[19] Improvement: 0.007905423175543547	Time (s): 1.163
[20] Improvement

In [46]:
data[1].shape

(202, 12)

In [47]:
result1['state_sequence']

KeyError: 'state_sequence'

In [None]:
result['state_sequence']

In [None]:
np.sum(result1['state_sequence'][3] == result['state_sequence'][3])

In [None]:
result['state_sequence'][0].shape

In [None]:
# n_rows = [len(i) > 0 for i in test_seq]
# n_cols = [i.shape[1] for i in test_seq]
# assert len(set(n_cols)) == 1
# assert sum(n_rows) == len(n_cols)

In [None]:
# from preprocessing import subsample_sequence

In [None]:
# test_seq[2]

In [None]:
# subsample_sequence(test_seq, 2)

In [None]:
a = np.array([1,2,3])
b = pd.DataFrame({'A':a})

In [None]:
b['B'] = b.A.apply(lambda x: np.array([0]*x))

In [None]:
b['B'][0]

In [None]:
a = np.zeros((3,2))
df = pd.DataFrame(a)

In [None]:
list(df.loc[0])

In [None]:
m = np.array([2,4,6,70,-1, 9])
m

In [None]:
b = m.reshape(-1, 2)
b

In [None]:
a = np.array([1, 0])

In [None]:
np.sqrt((b - np.tile(a, (3,1)))[:, 0]**2 + (b - np.tile(a, (3,1)))[:, 1]**2)

In [None]:
(b - np.tile(a, (3,1)))[:, 1]**2

In [None]:
(b - np.tile(a, (3,1)))[:, 0]**2

In [None]:
17**0.5

In [None]:
r = HSL.reorder_moment()

In [None]:
lengths[0]

In [None]:
len(role_assignments)

In [None]:
events_df7.moments.values

In [None]:
# original = copy.deepcopy(events_df7.moments.values)
# reordered = copy.deepcopy(events_df7.moments.values)
# divider = 0
# lengths = [len(m) for m in original]
# # iteratve through each moments length
# for i in range(len(lengths)):
# #     print(i, len(lengths))
#     # grab the corresponding moments' reordered roles
#     ra_i = role_assignments[divider:divider+lengths[i]]
#     # update the next starting index
#     divider += lengths[i]
#     # iterate through each moment in the current moments
#     for j in range(lengths[i]):
#         # iterate through each players
#         for p in HSL.defend_players:
#             # get the current player feature index
#             p_ind = HSL.find_features_ind_(p)[0]
#             # get the player feature index corresponding to the reordered role
#             re_p_ind = HSL.find_features_ind_(ra_i[j][p])[0]
#             reordered[i][j][re_p_ind] = original[i][j][p_ind] 
    

In [None]:
r[0]

In [None]:
team_ids

In [48]:
OHE = OneHotEncoding()
final = [np.column_stack((r[i], np.tile(OHE.encode(team_ids[i]), (len(r[i]), 1)))) for i in range(len(r))]

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (150, 6) + inhomogeneous part.

In [None]:
cmeans.shape

In [None]:
result[1]['X']

In [None]:
from scipy.spatial import distance
ed = distance.cdist(result[1]['X'], result[1]['cmeans'], 'euclidean')

In [None]:
ed.shape

In [None]:
concated_ms[0]

In [None]:
cmeans[0]

In [None]:
np.sqrt(sum((concated_ms[0]-cmeans[0])**2))

In [None]:
np.sqrt(sum((concated_ms[0]-cmeans[1])**2))

In [None]:
ed[0]

In [None]:
n = len(ed)//5 # number of sequences
assert len(ed) % 5 == 0

In [None]:
cost = ed[np.arange(5)*n]
cost

In [None]:
# 1) minimize the wrong posterior i.e. 1 - posterior
# 2) euclidean distance to the means
# 3) adjusted consine similarity to the means

In [None]:
# defend_X, defend_lengths = create_hmm_input(events_df7, players=list(range(5)))
# offend_X, offend_lengths = create_hmm_input(events_df7, players=list(range(5, 10)))

In [None]:
# defend_model = hmm.GaussianHMM(n_components=5, covariance_type='diag', n_iter=50, verbose=True)#, random_state=42)
# defend_model.fit(defend_X, defend_lengths)

In [None]:
# offend_model = hmm.GaussianHMM(n_components=5, covariance_type='diag', n_iter=50, verbose=True)#, random_state=42)
# offend_model.fit(offend_X, offend_lengths)

In [None]:
# from hmmlearn import hmm
# model = hmm.GaussianHMM(n_components=5, covariance_type='diag', n_iter=50, verbose=True)
# model.fit(X, lengths)

In [None]:
# cmeans = defend_model.means_
# covars = defend_model.covars_

In [None]:
# state_sequence = defend_model.predict(defend_X, defend_lengths)

In [None]:
d = defend_lengths.reshape(-1, 5)

In [None]:
s = state_sequence.reshape(-1, 5)

In [None]:
s.shape[0]

In [None]:
a = np.arange(10)
a

In [None]:
a.reshape(5, -1).T

In [None]:
defend_lengths.shape

In [None]:
l = defend_lengths.reshape(5, -1)

In [None]:
l[0,:] == l[1,:]

1) sumsample should only happen all the way until the end, e.g. if velocity is computed after subsample then the direction of the velocity will result in more errors.