In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, select
from sqlalchemy.sql import and_, or_, not_
import re
#from collections import Counter
import uuid 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, Ridge

import sqlite3

sqlite3.register_adapter(np.int64, lambda val: int(val))
sqlite3.register_adapter(np.int32, lambda val: int(val))

In [2]:
engine = create_engine('sqlite:///HoopStat.db')
conn = engine.connect()
metadata = MetaData(bind=None)
games = Table('games', metadata, autoload = True, autoload_with = engine)
events = Table('events', metadata, autoload = True, autoload_with = engine)
teams = Table('teams', metadata, autoload = True, autoload_with = engine)

In [134]:
stmt = """
SELECT DISTINCT team FROM
(
SELECT
home AS team
FROM games
UNION ALL
SELECT
away AS team
FROM games
)
"""
results = conn.execute(stmt).fetchall()
results

[('William Peace',),
 ('Simpson',),
 ('Eastern Nazarene',),
 ('Mass-Boston',),
 ('Brandeis',),
 ('North Park',),
 ('Alfred State',),
 ('Sage',),
 ('Rivier',),
 ('Wilkes',),
 ('Christopher Newport',),
 ('Suffolk',),
 ('Roger Williams',),
 ('Norwich',),
 ('Emmanuel',),
 ('Hartwick',),
 ('Rochester',),
 ('Finlandia',),
 ('University of New England',),
 ('Penn College',),
 ('Juniata',),
 ('Emerson',),
 ('Coe',),
 ('LeTourneau',),
 ('University of the Ozarks',),
 ('East Texas Baptist',),
 ('Belhaven',),
 ('Texas-Dallas',),
 ('Louisiana College',),
 ('UC Santa Cruz',),
 ('Bridgewater State',),
 ("St. Joseph's (Maine)",),
 ('Utica',),
 ('Dickinson',),
 ('Colby',),
 ('Colby-Sawyer',),
 ("St. Mary's (Md.)",),
 ('Delaware Valley',),
 ('Alfred',),
 ('Penn State-Harrisburg',),
 ('Gwynedd Mercy',),
 ('Cairn',),
 ('Ripon',),
 ('Lawrence',),
 ('Southwestern',),
 ('Centenary (La.)',),
 ('St. Thomas (Texas)',),
 ('Lake Forest',),
 ('Beloit',),
 ('St. Norbert',),
 ('Mount St. Joseph',),
 ('Capital',),
 

In [133]:
stmt = """
SELECT DISTINCT
t.id AS team, g.id AS gameID
FROM games AS g
JOIN teams AS t
ON g.home = t.id
OR g.away = t.id
"""
results = conn.execute(stmt).fetchall()
results

[]

In [162]:
stmt = """
SELECT team,player, COUNT(1) AS N FROM
(
SELECT g.home AS team, e.player
FROM games AS g
JOIN events AS e
ON e.gameid = g.id
WHERE e.team == 'Home'
UNION ALL
SELECT g.away AS team, e.player
FROM games AS g
JOIN events AS e
ON e.gameid = g.id
WHERE e.team == 'Away'
)
GROUP BY team,player
"""
results = conn.execute(stmt).fetchall()
rostersDF = pd.DataFrame(results)
rostersDF.columns=['team','player','N']
rostersDF['player'] = rostersDF['player'].dropna().apply(cleanPlayer)
rostersDF = rostersDF[
    (rostersDF['player'] != 'TEAM')
    &
    (~rostersDF['player'].str.contains("TIMEOUT", na=False))
    &
    (rostersDF['player'].str.len() > 2)
    
]
rostersDF = rostersDF.drop_duplicates()

def getRoster(team,rostersDF):
    df = rostersDF[rostersDF['team']==team]
    df = df.groupby(['player'])['N'].sum().reset_index()
    #rostersDF = rostersDF.groupby('team')['player'].apply(list).reset_index()
    df = df.sort_values(by=['N'],ascending=False).head(10)
    #rostersDF[rostersDF['team']=='William Peace']['player'].value_counts()
    return list(df['player'])

In [163]:
getRoster('William Peace',rostersDF)

['MITCHELL,KAYMON',
 'BURDEN,DANTE',
 'MANLEY,CALVIN',
 'DEARMAN,GARREN',
 'JR,KEVIN MCLAUGHLIN',
 'PHIFER,MICHAEL',
 'MINGLEDOFF,SAM',
 'FRYE,AJ',
 'MCDOWELL,DAMON',
 'EDWARDS,MALIK']

In [3]:
def cleanAction(x):
    x = re.sub('^\d+', '', x).lstrip()
    x = re.sub('\d+$', '', x).rstrip()
    x = re.sub(' by$', '', x).rstrip()
    return x
def cleanPeriod(x):
    x = int(''.join(filter(str.isdigit, x)))
    return x
def cleanPlayer(x):
    x = x.lstrip().rstrip()
    return x
def getStarters(df):
    nprd = df['period'].max()

    periodStart = pd.to_timedelta('00:00:00')
    periodEnd = pd.to_timedelta('00:40:00')
    if nprd > 2:
        n = nprd - 2
        while n > 0:
            periodEnd += pd.to_timedelta('00:05:00')
            n -= 1
    lineups2 = df[df.action.isin(['enters the game','goes to the bench'])
                 ][['player','action','time','period','team']]
    #lineups2['time'] = lineups2['time'].apply(pd.to_datetime) + pd.DateOffset(lineups2['period'])
    linePV = pd.pivot_table(lineups2,index=['player','team'],columns='action',values='time',aggfunc=np.min).reset_index()
    linePV['enters the game'] = linePV['enters the game'].fillna(periodStart)
    linePV['goes to the bench'] = linePV['goes to the bench'].fillna(periodEnd)
    
    starters = linePV[
        (
            (linePV['goes to the bench'] < linePV['enters the game'])
        )
        |
        (
            (linePV['enters the game'] == '00:00:00')
        )
                     ][['team','player','goes to the bench','enters the game']]
    return list(starters[starters['team']=='Home']['player']),list(starters[starters['team']=='Away']['player'])

In [4]:
actValMap = {
'Assist':0,
'Block':0,
'Foul':0,
'Steal':0,
'Technicaloul':0,
'Turnover':0,
'deadball rebound':0,
'defensive rebound':0,
'enters the game':0,
'goes to the bench':0,
'made 2-pt field goal':2,
'made 3-pt jump shot':3,
'made dunk':2,
'made free throw':1,
'made jump shot':2,
'made layup':2,
'made tip-in':2,
'missed 2-pt field goal':0,
'missed 3-pt jump shot':0,
'missed dunk':0,
'missed free throw':0,
'missed jump shot':0,
#missed jump shot3missed 3-pt jump shot2missed 2-pt field goalmissed layupmissed free throwmissed dunkmissed tip-in}	123
'missed layup':0,
'missed tip-in':0,
'offensive rebound':0,
'pointswrong basket by defense':-2,
#score=2text=made jump shot3score=3text=made 3-pt jump shot2score=2text=made 2-pt field goalscore=2text=made layupscore=1text=made free throwscore=2text=made dunkscore=2text=made tip-in	108
'will be starting':0
}

In [159]:
def fetchGameMeta():
    stmt3 = "SELECT * FROM games WHERE isProcessed=1 AND (pmProcessed = 0 or pmProcessed IS NULL) LIMIT 1 "
    res3 = conn.execute(stmt3).fetchall()
    gameID = res3[0][0]
    homeTeam = res3[0][1]
    homeScore_final = res3[0][2]
    awayTeam = res3[0][3]
    awayScore_final = res3[0][4]
    gameDate = res3[0][5]
    return gameID,homeTeam,homeScore_final,awayTeam,awayScore_final,gameDate   
def fetchGame(gameID):    
    stmt2 = "SELECT * FROM events WHERE gameid = '" + gameID + "'"
    df = pd.read_sql_query(stmt2,engine)
    return df
def cleanGame(df):
    df['scoreHome'] = df['scoreHome'].map(int)
    df['scoreAway'] = df['scoreAway'].map(int)
    
    df['action'] = df['action'].apply(cleanAction)
    df['period'] = df['period'].apply(cleanPeriod).apply(int)
    df['player'] = df['player'].apply(cleanPlayer)
    df['duration'] = df['duration'].apply(int)
    
    df['actionValue'] = df['action'].map(actValMap).map(int,na_action='ignore')
    
    df['time'] = pd.to_timedelta('00:'+df['time'])
    df.loc[df['period'] <= 2,'time'] = pd.to_timedelta('00:20:00') - df.loc[df['period'] <= 2,'time']
    df.loc[df['period'] > 2,'time'] = pd.to_timedelta('00:05:00') - df.loc[df['period'] > 2,'time']
    df.loc[df['period'] == 2,'time'] += pd.to_timedelta('00:20:00')
    df.loc[df['period'] == 3,'time'] += pd.to_timedelta('00:25:00')
    df.loc[df['period'] == 4,'time'] += pd.to_timedelta('00:30:00')
    df.loc[df['period'] == 5,'time'] += pd.to_timedelta('00:35:00')
    df.loc[df['period'] == 6,'time'] += pd.to_timedelta('00:40:00')
    df.loc[df['period'] == 7,'time'] += pd.to_timedelta('00:45:00')
    df.loc[df['period'] == 8,'time'] += pd.to_timedelta('00:50:00')
    
    df['seqNo'] = df['time'].ne(df['time'].shift()).cumsum()
    
    df = df.sort_values(by=['time'],ascending=True)
    
    conditions = [
        (df['actionValue'] == 1),
        (df['actionValue'] == 2),
        (df['actionValue'] == 3),
        (df['action'].str.contains('missed') & df['action'].str.contains('3')),
        (df['action'].str.contains('missed') & ~df['action'].str.contains('3') & ~df['action'].str.contains('free')),
        (df['action'].str.contains('missed') & ~df['action'].str.contains('3') & df['action'].str.contains('free'))
    ]
    choices = ['FTM', 'FG2', 'FG3','3PA','2PA','FTA']
    
    df['action_edit1'] = np.select(conditions, choices, default=df['action'])
    
    #df['psnChg'] = False
    
    df['playScore'] = df['time'].map(df.groupby("time")['actionValue'].sum())
    return df

def sequenceGame(df):
    metricDF1 = df.copy()
    metricDF1.loc[metricDF1['team']=='Away','actionValue'] = metricDF1.loc[metricDF1['team']=='Away','actionValue'] * -1
    metricDF1 = metricDF1.set_index('team').groupby('seqNo', as_index=False).agg({
        #'action':dict
        'actionValue':np.sum
        , 'duration':np.sum
        , 'scoreHome':np.max
        , 'scoreAway':np.max
    }
    )
    metricDF1['n_1'] = metricDF1['actionValue'].shift(-1)
    
    metricDF1['clockUse'] = pd.cut(metricDF1['duration'],5,labels = ['Early','Early-Mid','Mid','Late-Mid','Late'])
    return metricDF1

def set_pm(df,rosterH,rosterA,debug=False,isHome=True):
    HLU,ALU = getStarters(df)

    lineupDF = df.copy().loc[df.action.isin(['enters the game','goes to the bench']),['time','action','player','team','scoreHome','scoreAway','seqNo']]
    lineupDF = lineupDF.reset_index()
    
    #rosterH = [p for p in lineupDF[lineupDF['team']=='Home']['player'].unique()]
    #rosterA = [p for p in lineupDF[lineupDF['team']=='Away']['player'].unique()]
    
    seq = lineupDF.loc[0,'seqNo'].copy()
    time = lineupDF.loc[0,'time']
    hSc = lineupDF.loc[0,'scoreHome'].copy()
    aSc = lineupDF.loc[0,'scoreAway'].copy()
    diff = hSc-aSc
    
    away = pd.DataFrame(data={'lineup':[ALU],'time':pd.to_timedelta('00:00:00'),'team':'Away','diff':0}).head(1)
    home = pd.DataFrame(data={'lineup':[HLU],'time':pd.to_timedelta('00:00:00'),'team':'Home','diff':0}).head(1)
    
    h = home.loc[0,'lineup'].copy()
    h.sort()
    a = away.loc[0,'lineup'].copy()
    a.sort()
    
    hPlayerPM = {'Home':{i:{'curDiff':0, 'pm':0, 'curTime':pd.to_timedelta('00:00:00'), 'tmp':0} for i in lineupDF[lineupDF['team']=='Home'].player.unique()}}
    aPlayerPM = {'Away':{i:{'curDiff':0, 'pm':0, 'curTime':pd.to_timedelta('00:00:00'), 'tmp':0} for i in lineupDF[lineupDF['team']=='Away'].player.unique()}}
    aPlayerPM.update(hPlayerPM)
    playerPM = aPlayerPM
    if debug:
        print('Home On Floor: ' + str(h))
        print('Away On Floor: ' + str(a))
    try:
        stints = pd.DataFrame(data = {'seqStart':0
                                      , 'seqEnd':seq
                                      , 'timeStart':pd.to_timedelta('00:00:00')
                                      , 'timeEnd':time
                                      , 'diffStart':0
                                      , 'diffEnd': diff
                                      , 'HOF':[[p for p in h]]
                                      , 'AOF':[[p for p in a]]
                                      , 'HX':[[1 if p in h else 0 for p in rosterH]]
                                      , 'AX':[[-1 if p in a else 0 for p in rosterA]]
                                      , 'HR':[[i for i in rosterH]]
                                      , 'AR':[[i for i in rosterA]]
                                      , 'Y':pd.Series((diff)/(seq))*100
                                     })
    except:
        pd.DataFrame(data = {'seqStart':0
                                      , 'seqEnd':seq
                                      , 'timeStart':pd.to_timedelta('00:00:00')
                                      , 'timeEnd':time
                                      , 'diffStart':0
                                      , 'diffEnd': diff
                                      , 'HOF':[[p for p in h]]
                                      , 'AOF':[[p for p in a]]
                                      , 'HX':[[1 if p in h else 0 for p in rosterH]]
                                      , 'AX':[[-1 if p in a else 0 for p in rosterA]]
                                      , 'HR':[[i for i in rosterH]]
                                      , 'AR':[[i for i in rosterA]]
                            }
                    )
        return
    
    hx = [1 if p in h else 0 for p in rosterH]
    ax = [1 if p in a else 0 for p in rosterA]
    
    for i in range(0,len(lineupDF)):
        nseq = lineupDF.loc[i,'seqNo'].copy()
        ntime = lineupDF.loc[i,'time']
        plyr = lineupDF.loc[i,'player']
        act = lineupDF.loc[i,'action']
        tm = lineupDF.loc[i,'team']
        ndiff = diff.copy()
        #print(seq)
        #print(nseq)
        if nseq != seq:
            hSc = lineupDF.loc[i,'scoreHome'].copy()
            aSc = lineupDF.loc[i,'scoreAway'].copy()
            ndiff = hSc - aSc
            #if tm=="Away":
            #    ndiff *= -1
            if homeSub:
                try:
                    newH = pd.DataFrame(data={'lineup':[[p for p in h]],'time':time,'team':'Home','diff':ndiff},index=[0])
                    home = home.append(newH)
                    if debug:
                        print('Added ' + str(newH.loc[0,'lineup']) + ' to Home lineups')
                except ValueError as e:
                    if debug:
                        print('Couldn''t add ' + str(newH))
                        print(e)
            if awaySub:
                try:
                    newA = pd.DataFrame(data={'lineup':[[p for p in a]],'time':time,'team':'Away','diff':aSc-hSc},index=[0])#.head(1)
                    away = away.append(newA)
                    if debug:
                        print('Added ' + str(newA.loc[0,'lineup']) + ' to Away lineups')
                except ValueError as e:
                    if debug:
                        print('Couldn''t add ' + str(newA))
                        print(e)
            hx_cur = []
            ax_cur = []
            hx_cur = [1 if p in h else 0 for p in rosterH]
            ax_cur = [-1 if p in a else 0 for p in rosterA]
            newStint = pd.DataFrame(data={'seqStart':seq
                                          , 'seqEnd':nseq
                                          , 'timeStart':time
                                          , 'timeEnd':ntime
                                          , 'diffStart':diff
                                          , 'diffEnd': ndiff
                                          , 'HOF': [[p for p in h]]
                                          , 'AOF': [[p for p in a]]
                                          , 'HX':[hx_cur]
                                          , 'AX':[ax_cur]
                                          , 'HR':[rosterH]
                                          , 'AR':[rosterA]
                                          , 'Y':((ndiff - diff)/(nseq - seq))*100
                                                 }
                                           )
            stints = stints.append(newStint)
            seq = nseq
            time = ntime
            diff = ndiff
        if debug:
            print(ntime)
            print(tm + ':' + plyr + ' ' + act)
        homeSub = False
        awaySub = False
        if act=='enters the game':
            if tm=='Home' and plyr not in h:
                h.append(plyr)
                homeSub = True
                #print('Added ' + lineupDF.loc[i,'player'])
            else:
                if plyr not in h:
                    a.append(plyr)
                    awaySub = True
                    #print('Added ' + lineupDF.loc[i,'player'])
            playerPM[tm][plyr]['curTime']=ntime
            playerPM[tm][plyr]['curDiff']=ndiff
        if act=='goes to the bench':
            if tm=='Home':
                try:
                    h.remove(plyr)
                    homeSub = True
                    #print('Removed ' + lineupDF.loc[i,'player'])
                except Exception as e:
                    if debug:
                        print('Exception: ' + str(e))
            else:
                try:
                    a.remove(plyr)
                    awaySub = True
                    #print('Removed ' + lineupDF.loc[i,'player'])
                except Exception as e:
                    if debug:
                        print('Exception :' + str(e))
            playerPM[tm][plyr]['pm'] += (ndiff - playerPM[tm][plyr]['curDiff'])
            #print((ndiff - playerPM[tm][plyr]['curDiff']))
            timeEntered = playerPM[tm][plyr]['curTime']    
            minPlayed = ((ntime - timeEntered).seconds)/60
            
            playerPM[tm][plyr]['tmp'] += int(minPlayed)
            if debug:
                print('Time: ',(ntime))
                print('Player entered at: ',(playerPM[tm][plyr]['curTime']))
                print('Minutes Played: ',(ntime - playerPM[tm][plyr]['curTime']))
        h.sort()
        a.sort()
        if debug:
            print('Home On Floor: ' + str(h))
            print('Away On Floor: ' + str(a))
    return playerPM,stints

def updatePlayers(playerPM,gameID,homeTeam,awayTeam):
    pmdfH = pd.DataFrame(data=playerPM.copy()['Home']).transpose().reset_index().iloc[:,[0,2,4]]
    pmdfH['team'] = homeTeam
    pmdfA = pd.DataFrame(data=playerPM.copy()['Away']).transpose().reset_index().iloc[:,[0,2,4]]
    pmdfA['team'] = awayTeam
    
    pmDF = pd.concat([pmdfH,pmdfA])
    pmDF['year'] = gameDate[0:4]
    pmDF.rename(columns={'index':'name'}, inplace=True)
    pmDF['id'] = pmDF['team'] + '_' + pmDF['name']
    pmDF = pmDF[['id','name','team','year','pm','tmp']]
    pmDF['tmp'] = pmDF['tmp'].apply(int)
    #"""
    pmDF.to_sql('temp_table', engine, if_exists='replace')
    
    sql = """
    INSERT INTO players(id,name,team,year,pm,tmp,tgp)
    SELECT
    id,name,team,year,pm,tmp,1
    FROM temp_table AS tt 
    WHERE true
      ON CONFLICT(id) DO UPDATE SET pm = pm + excluded.pm, tmp = tmp + excluded.tmp, tgp = tgp + 1;
    """
    try:
        conn.execute(sql)
        stmt = games.update().where(games.c.id==gameID).values(pmProcessed=True)
        conn.execute(stmt)
        print('Processed P/M for game ID: ' + gameID)
    except Exception as e:
        print('Error running P/M game ID: ' + gameID)
    
def getAPM(stints,team,roster,isHome=True):
    apm = pd.DataFrame()
    X = stints['HX'] + stints['AX']
    #X = X.shift(1) - X
    Y = stints['Y'] 
    Y = pd.Series(Y)
    x = np.array(X.tolist())
    xh = np.array(stints['HX'].tolist())
    xa = np.array(stints['AX'].apply(lambda x: [i*-1 for i in x]))
    print(xa)
    if isHome:
        cfit = Ridge(alpha=1.0).fit(xh, Y)
        reg = LinearRegression().fit(xh, Y)
    else:
        cfit = Ridge(alpha=1.0).fit(xa, Y)
        reg = LinearRegression().fit(xa, Y)
    
    for i in range(0,len(roster)):
        iapm = pd.DataFrame(
        data = {
            'player':team + '_' + roster[i]
            ,'apm1':reg.coef_[i]/100
            ,'apm2':cfit.coef_[i]/100
        },index=[0])
        apm = pd.concat([apm,iapm])
    return apm
    #regDF.head(20)

In [7]:
t1 = conn.execute("SELECT DISTINCT id FROM (SELECT home AS id FROM games UNION ALL SELECT away FROM games)")
t2 = t1.fetchall()
t3 = [t for t, in t2]
tDF = pd.DataFrame(t3)
#tDF.to_sql(teams, con=engine, if_exists='append', index=False)
t3

['William Peace',
 'Simpson',
 'Eastern Nazarene',
 'Mass-Boston',
 'Brandeis',
 'North Park',
 'Alfred State',
 'Sage',
 'Rivier',
 'Wilkes',
 'Christopher Newport',
 'Suffolk',
 'Roger Williams',
 'Norwich',
 'Emmanuel',
 'Hartwick',
 'Rochester',
 'Finlandia',
 'University of New England',
 'Penn College',
 'Juniata',
 'Emerson',
 'Coe',
 'LeTourneau',
 'University of the Ozarks',
 'East Texas Baptist',
 'Belhaven',
 'Texas-Dallas',
 'Louisiana College',
 'UC Santa Cruz',
 'Bridgewater State',
 "St. Joseph's (Maine)",
 'Utica',
 'Dickinson',
 'Colby',
 'Colby-Sawyer',
 "St. Mary's (Md.)",
 'Delaware Valley',
 'Alfred',
 'Penn State-Harrisburg',
 'Gwynedd Mercy',
 'Cairn',
 'Ripon',
 'Lawrence',
 'Southwestern',
 'Centenary (La.)',
 'St. Thomas (Texas)',
 'Lake Forest',
 'Beloit',
 'St. Norbert',
 'Mount St. Joseph',
 'Capital',
 'Fisher',
 'University of Dallas',
 'Austin',
 'Rhodes',
 'Hendrix',
 'Sewanee',
 'Centre',
 'Linfield',
 'Willamette',
 'Pacific Lutheran',
 'Puget Sound',

In [160]:
for i in range(0,20):
    teamid = t3[i].replace("'","''")
    stmt3 = """
    SELECT * FROM games \
    WHERE (home = '""" \
    + teamid + """' \
    OR away = '""" \
    + teamid + """') \
    AND id NOT LIKE '%error%'
    """
    allStints = pd.DataFrame()
    res3 = conn.execute(stmt3).fetchall()
    nGames = len(res3)
    nTrain = int(nGames*0.7)
    nTest = nGames - nTrain
    for j in range(0,nTrain):
        gameID = res3[j][0]
        homeTeam = res3[j][1]
        homeScore_final = res3[j][2]
        awayTeam = res3[j][3]
        awayScore_final = res3[j][4]
        gameDate = res3[j][5]
        isHome = True
        if awayTeam == teamid:
            isHome = False
        rosterH = getRoster(homeTeam,rostersDF)
        rosterH.sort()
        rosterA = getRoster(awayTeam,rostersDF)
        rosterA.sort()
        #try:        
        if True:
            print('Processing ' + str(j) + '/' + str(nTrain) + ' for team: ' + teamid)
            df = fetchGame(gameID)
            df = cleanGame(df)
            seqdf = sequenceGame(df)
            playerPM,stints = set_pm(df,rosterH,rosterA,debug=False,isHome=isHome)
            if isHome:
                pd.concat([allStints,stints[['HX','Y']]])
            else:
                stints['Y'] *= -1
                stints['AX'].apply(lambda x: [i*-1 for i in x])
                pd.concat([allStints,stints[['AX','Y']]])
            updatePlayers(playerPM,gameID,homeTeam,awayTeam)
            if isHome:
                r = rosterH
            else:
                r = rosterA
            #for i in r:
            #    print(i)
            apm = getAPM(stints,teamid,r,isHome)
            apm.to_sql('temp_table2', engine, if_exists='replace')
            sql = """
                    UPDATE players AS p
                    SET apm1 = t.apm1, apm2 = t.apm2
                    FROM temp_table2 AS t
                    WHERE p.id = t.player
                    """
#for i in rostersDF['player'].head(1):
#    for j in i:
#        print(j)
        #except:
        #    print('Failed processing game ID: ' + gameID)
        #    stmt = games.update().where(games.c.id==gameID).values(pmFailed=True)
        #    conn.execute(stmt)
        #    stmt = games.update().where(games.c.id==gameID).values(pmProcessed=True)
        #    conn.execute(stmt)

Processing 0/18 for team: William Peace
Processed P/M for game ID: 20200101_k6nz
BURDEN,DANTE
DEARMAN,GARREN
FRYE,AJ
JR,KEVIN MCLAUGHLIN
MANLEY,CALVIN
MINGLEDOFF,SAM
MITCHELL,KAYMON
PHIFER,MICHAEL
[list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 0, 0, 0, 0, 0])
 list([0, 0, 0, 0, 0, 0, 0, 0]) list([0, 0, 0, 

ValueError: setting an array element with a sequence.

In [None]:
stmt = select([games.c.id]).where((games.columns.isProcessed == True) 
                                  & (games.columns.pmProcessed == False)
                                  & (games.columns.pmFailed != True)
                                 )
results = conn.execute(stmt)
gameIDs = [value for value, in results]


N = 20#len(gameIDs)
n = 0
print(N)
if N > 1000:
    N = 1000

while n < N:
    gameID,homeTeam,homeScore_final,awayTeam,awayScore_final,gameDate = fetchGameMeta()
    try:        
        print('Processing ' + str(n) + '/' + str(N))
        df = fetchGame(gameID)
        df = cleanGame(df)
        seqdf = sequenceGame(df)
        playerPM,stints = set_pm(df)
        updatePlayers(playerPM,gameID,homeTeam,awayTeam)
        apm = getAPM()
    except:
        print('Failed processing game ID: ' + gameID)
        stmt = games.update().where(games.c.id==gameID).values(pmFailed=True)
        conn.execute(stmt)
        stmt = games.update().where(games.c.id==gameID).values(pmProcessed=True)
        conn.execute(stmt)
    games = Table('games', metadata, autoload = True, autoload_with = engine)
    n += 1


In [None]:
HLU = []
ALU = []
HLU, ALU = getStarters(df)

endScore_calc = df.scoreHome.iat[-1]
endScore_calc = df.scoreAway.iat[-1]

df.head(10)

In [None]:
rebDF = df[df.action.str.contains('rebound')].copy()
rebDF.merge(metricDF1.assign(seqNo=metricDF1.seqNo-1),on='seqNo')[[
    'id'
    , 'gameid'
    , 'time'
    , 'action_x'
#    , 'scoreHome_x'
#    , 'scoreAway_x'
    , 'team'
    , 'duration_x'
    , 'duration_y'
    , 'player'
    , 'period'
    , 'playScore'
#    , 'actionValue_x'
    , 'seqNo'
#    , 'action_edit1'
#    , 'psnChg'
    , 'action_y'
    , 'actionValue_y'
    , 'scoreHome_y'
    , 'scoreAway_y'
#    , 'n_1'
    , 'clockUse'
]]

#rebDF.head(25)

In [6]:
#view all actions and frequency
#"""
stmt = "\
SELECT \
action\
,COUNT(1) \
FROM events \
WHERE action NOT LIKE '%time%out%'\
GROUP BY \
action \
ORDER BY 2 desc"

act = pd.read_sql_query(stmt,engine)
#"""

act['action'] = act['action'].apply(cleanAction)

act = act.groupby('action').sum()

act.head(50)#.action.unique()#.head(50)

Unnamed: 0_level_0,COUNT(1)
action,Unnamed: 1_level_1
,23
Assist,156287
Block,32976
Foul,194202
Steal,79129
Technicaloul,2278
Turnover,158926
deadball rebound,33463
defensive rebound,298043
enters the game,360537


In [None]:
stmt = "SELECT DISTINCT \
e.gameid\
,e.period\
,e.action AS action1 \
,e2.action AS action2 \
,e.team AS team1 \
,e2.team AS team2 \
FROM events AS e \
INNER JOIN events AS e2 \
ON e.gameid = e2.gameid \
AND e.period = e2.period \
AND e.time = e2.time \
AND e.action > e2.action\
 WHERE e.gameid = '" + gameIDs[0] + "'" 


#print(stmt)
actdf = pd.read_sql_query(stmt,engine)
actdf['action1'] = actdf.action1.apply(cleanAction)
actdf['action2'] = actdf.action2.apply(cleanAction)

actdf.head(25)

In [None]:
#stmt = "DELETE FROM events"
#conn.execute("DELETE FROM players")

In [None]:
stmt = "SELECT * FROM games"
res = conn.execute(stmt).fetchall()
res[0:20]

In [None]:
stmt = games.update().values(pmFailed=False)
conn.execute(stmt)

In [1]:
conn.close()

NameError: name 'conn' is not defined

In [None]:
stmt = select([games]).where((games.columns.pmFailed == True)
                                 )
results = conn.execute(stmt).fetchall()
results

Unnamed: 0,team,player
0,Adrian,"[RAKE,BRANDON, WILLIAMSON,ISAIAH, STEVENS,RYAN..."
1,Air Force,"[THOMAS,ANDRAY, ROMERO,BRANDON, ATHERLEY,KALEB..."
2,Akron-Wayne,"[COX,MICAIAH, TYSON,SEAN, KOLAR,JACK, ANDREW,T..."
3,Alas. Anchorage,"[WITKOWSKI,PACKY, BALDEZ,NOAH, COX,JT, KANE,JU..."
4,Albany (N.Y.),"[THOMAS,JAYQUAN, BAPTISTE,TYRESE, DELSOL-LOWRY..."
5,Albany Pharmacy,"[HOSEY,JOSHUA, HILL,AARON, BRADFORD,DERIAN, SM..."
6,Albertus Magnus,"[GIULIANI,DAVID, WAGNER,JARED, POLCZYNSKI,JOE,..."
7,Albion,"[THOMAS,EVAN, GEORGE,TYLER, GRANGER,PRESTON, D..."
8,Albright,"[HOLLIS,RYAN, HARWARD,DYSON, TERRY,MO, DANGERF..."
9,Alfred,"[SUMMERS,JUSTIN, ASHLEY,DANNY, COLLINS,TYLER, ..."
