In [1]:
import re # module for regular expressions
import csv

In [2]:
with open('nba_game_warriors_thunder_20181016.txt') as csv_text:
    text_line = csv.reader(csv_text, delimiter = '|')
    play_by_play = [each for each in text_line]

In [3]:
comments = [each[-1] for each in play_by_play]
comments

['Turnover by K. Thompson (bad pass; steal by S. Adams)',
 'Turnover by P. George (bad pass)',
 'S. Curry makes 3-pt jump shot from 24 ft (assist by K. Durant)',
 'S. Adams misses 2-pt jump shot from 12 ft',
 'Offensive rebound by D. SchrГ¶der',
 'P. George misses 3-pt jump shot from 26 ft',
 'Defensive rebound by K. Durant',
 'K. Durant makes 2-pt layup from 2 ft',
 'D. SchrГ¶der misses 2-pt jump shot from 14 ft',
 'Offensive rebound by D. SchrГ¶der',
 'S. Adams misses 2-pt layup from 3 ft (block by K. Durant)',
 'Defensive rebound by D. Green',
 'Turnover by D. Green (out of bounds lost ball)',
 'P. Patterson makes 2-pt layup from 2 ft (assist by S. Adams)',
 'Shooting foul by D. Green (drawn by P. Patterson)',
 'P. Patterson makes free throw 1 of 1',
 'D. Jones makes 2-pt dunk from 1 ft (assist by D. Green)',
 'P. Patterson misses 2-pt hook shot from 8 ft',
 'Offensive rebound by P. Patterson',
 'Shooting foul by K. Thompson (drawn by P. Patterson)',
 'P. Patterson makes free throw 

In [4]:
def find_all_players(play_by_play):
    name_pattern = re.compile(r'\w\. \w+', re.I)
    all_players = []
    for event in play_by_play:
        name = name_pattern.search(event[-1])

        if name:
            all_players.append(name.group(0))   # group(0) выдает весь искомый текст
    return list(set(all_players))     # set() removes duplicates

all_players = find_all_players(play_by_play) 

In [5]:
all_players

['K. Looney',
 'S. Adams',
 'N. Noel',
 'S. Curry',
 'H. Diallo',
 'T. Ferguson',
 'J. Jerebko',
 'K. Durant',
 'Q. Cook',
 'R. Felton',
 'D. Jones',
 'A. Iguodala',
 'P. Patterson',
 'J. Grant',
 'K. Thompson',
 'J. Bell',
 'Ѓ. Abrines',
 'A. McKinnie',
 'S. Livingston',
 'D. Green',
 'D. SchrГ',
 'P. George']

In [6]:
def all_player_stat(play_by_play, all_players):
    p2_pattern = re.compile(r'(\w\. \w+) makes 2-pt', re.I)   # re.I - ignore case sensitive, case insensitive
    p2a_pattern = re.compile(r'(\w\. \w+) misses 2-pt', re.I) 
    p3_pattern = re.compile(r'(\w\. \w+) makes 3-pt', re.I)
    p3a_pattern = re.compile(r'(\w\. \w+) misses 3-pt', re.I)
    orb_pattern = re.compile(r'Offensive rebound by (\w\. \w+)', re.I) 
    drb_pattern = re.compile(r'Defensive rebound by (\w\. \w+)', re.I)
    ft_pattern = re.compile(r'(\w\. \w+) makes free throw', re.I)
    ft2_pattern = re.compile(r'(\w\. \w+) makes clear path free throw', re.I)
    fta_pattern = re.compile(r'(\w\. \w+) misses free throw', re.I) 
    ast_pattern = re.compile(r'assist by (\w\. \w+)', re.I) 
    stl_pattern = re.compile(r'steal by (\w\. \w+)', re.I) 
    blk_pattern = re.compile(r'block by (\w\. \w+)', re.I) 
    tov_pattern = re.compile(r'Turnover by (\w\. \w+)', re.I) 
    pf_pattern = re.compile(r'foul by (\w\. \w+)', re.I) 

    players_stats = []

    for player in all_players:

        player_stats = {"player_name": '', "FG": 0, "FGA": 0, "FG%": 0, "3P": 0, "3PA": 0, "3P%": 0, "FT": 0, "FTA": 0, "FT%": 0, "ORB": 0, "DRB": 0, "TRB": 0, "AST": 0, "STL": 0, "BLK": 0, "TOV": 0, "PF": 0, "PTS": 0}
        player_stats['player_name'] = player

        for event in play_by_play:

            p3 = p3_pattern.search(event[-1]) 
            if p3:
                if p3.group(1) == player:
                    player_stats['3P'] += 1
                    player_stats['3PA'] += 1
                    player_stats['FG'] += 1
                    player_stats['FGA'] += 1
            
            p3a = p3a_pattern.search(event[-1]) 
            if p3a:
                if p3a.group(1) == player:
                    player_stats['3PA'] += 1
                    player_stats['FGA'] += 1

            p2 = p2_pattern.search(event[-1]) 
            if p2:
                if p2.group(1) == player:
                    player_stats['FG'] += 1
                    player_stats['FGA'] += 1

            p2a = p2a_pattern.search(event[-1]) 
            if p2a:
                if p2a.group(1) == player:
                    player_stats['FGA'] += 1

            orb = orb_pattern.search(event[-1]) 
            if orb:
                if orb.group(1) == player:
                    player_stats['ORB'] += 1

            drb = drb_pattern.search(event[-1]) 
            if drb:
                if drb.group(1) == player:
                    player_stats['DRB'] += 1

            ft = ft_pattern.search(event[-1]) 
            if ft:
                if ft.group(1) == player:
                    player_stats['FT'] += 1
                    player_stats['FTA'] += 1
            
            ft2 = ft2_pattern.search(event[-1])
            if ft2:
                if ft2.group(1) == player:
                    player_stats['FT'] += 1
                    player_stats['FTA'] += 1

            fta = fta_pattern.search(event[-1]) 
            if fta:
                if fta.group(1) == player:
                    player_stats['FTA'] += 1

            ast = ast_pattern.search(event[-1]) 
            if ast:
                if ast.group(1) == player:
                    player_stats['AST'] += 1

            stl = stl_pattern.search(event[-1]) 
            if stl:
                if stl.group(1) == player:
                    player_stats['STL'] += 1

            blk = blk_pattern.search(event[-1]) 
            if blk:
                if blk.group(1) == player:
                    player_stats['BLK'] += 1

            tov = tov_pattern.search(event[-1]) 
            if tov:
                if tov.group(1) == player:
                    player_stats['TOV'] += 1

            pf = pf_pattern.search(event[-1]) 
            if pf:
                if pf.group(1) == player:
                    player_stats['PF'] += 1

            player_stats['FG%'] = (player_stats['FG'] / player_stats['FGA']) if player_stats['FGA'] != 0 else 0
            player_stats['3P%'] = (player_stats['3P'] / player_stats['3PA']) if player_stats['3PA'] != 0 else 0
            player_stats['FT%'] = (player_stats['FT'] / player_stats['FTA']) if player_stats['FTA'] != 0 else 0
            player_stats['TRB'] = player_stats['DRB'] + player_stats['ORB']
            player_stats['PTS'] = (2 * (player_stats['FG'] - player_stats['3P'])) + (3 * player_stats['3P']) + player_stats['FT']
   
        players_stats.append(player_stats)
    return players_stats

players_stats = all_player_stat(play_by_play, all_players)

In [7]:
players_stats

[{'player_name': 'K. Looney',
  'FG': 5,
  'FGA': 11,
  'FG%': 0.45454545454545453,
  '3P': 0,
  '3PA': 0,
  '3P%': 0,
  'FT': 0,
  'FTA': 0,
  'FT%': 0,
  'ORB': 8,
  'DRB': 2,
  'TRB': 10,
  'AST': 2,
  'STL': 1,
  'BLK': 2,
  'TOV': 1,
  'PF': 4,
  'PTS': 10},
 {'player_name': 'S. Adams',
  'FG': 6,
  'FGA': 12,
  'FG%': 0.5,
  '3P': 0,
  '3PA': 0,
  '3P%': 0,
  'FT': 5,
  'FTA': 8,
  'FT%': 0.625,
  'ORB': 4,
  'DRB': 7,
  'TRB': 11,
  'AST': 4,
  'STL': 2,
  'BLK': 0,
  'TOV': 2,
  'PF': 3,
  'PTS': 17},
 {'player_name': 'N. Noel',
  'FG': 1,
  'FGA': 2,
  'FG%': 0.5,
  '3P': 0,
  '3PA': 0,
  '3P%': 0,
  'FT': 1,
  'FTA': 2,
  'FT%': 0.5,
  'ORB': 3,
  'DRB': 4,
  'TRB': 7,
  'AST': 1,
  'STL': 1,
  'BLK': 1,
  'TOV': 0,
  'PF': 3,
  'PTS': 3},
 {'player_name': 'S. Curry',
  'FG': 11,
  'FGA': 20,
  'FG%': 0.55,
  '3P': 5,
  '3PA': 9,
  '3P%': 0.5555555555555556,
  'FT': 5,
  'FTA': 5,
  'FT%': 1.0,
  'ORB': 0,
  'DRB': 8,
  'TRB': 8,
  'AST': 9,
  'STL': 1,
  'BLK': 0,
  'TOV': 3

In [8]:
def match_to_team(play_by_play, all_players):

    team_player = {name:'' for name in all_players}

    p3a_pattern = re.compile(r'(\w\. \w+) misses 3-pt', re.I)
    pf_pattern = re.compile(r'foul by (\w\. \w+)', re.I)

    for event in play_by_play:
        p3a = p3a_pattern.search(event[-1])
        if p3a:
            name = p3a.group(1)
            team_player[name] = event[2]
        
        pf = pf_pattern.search(event[-1])
        if pf:
            name = pf.group(1)
            team_player[name] = event[2] 

    return team_player

team_player = match_to_team(play_by_play, all_players)

In [9]:
team_player

{'K. Looney': 'OKLAHOMA_CITY_THUNDER',
 'S. Adams': 'OKLAHOMA_CITY_THUNDER',
 'N. Noel': 'GOLDEN_STATE_WARRIORS',
 'S. Curry': 'OKLAHOMA_CITY_THUNDER',
 'H. Diallo': 'GOLDEN_STATE_WARRIORS',
 'T. Ferguson': 'OKLAHOMA_CITY_THUNDER',
 'J. Jerebko': 'OKLAHOMA_CITY_THUNDER',
 'K. Durant': 'OKLAHOMA_CITY_THUNDER',
 'Q. Cook': 'OKLAHOMA_CITY_THUNDER',
 'R. Felton': 'OKLAHOMA_CITY_THUNDER',
 'D. Jones': 'GOLDEN_STATE_WARRIORS',
 'A. Iguodala': 'GOLDEN_STATE_WARRIORS',
 'P. Patterson': 'OKLAHOMA_CITY_THUNDER',
 'J. Grant': 'OKLAHOMA_CITY_THUNDER',
 'K. Thompson': 'GOLDEN_STATE_WARRIORS',
 'J. Bell': 'OKLAHOMA_CITY_THUNDER',
 'Ѓ. Abrines': 'OKLAHOMA_CITY_THUNDER',
 'A. McKinnie': 'GOLDEN_STATE_WARRIORS',
 'S. Livingston': 'GOLDEN_STATE_WARRIORS',
 'D. Green': 'OKLAHOMA_CITY_THUNDER',
 'D. SchrГ': 'GOLDEN_STATE_WARRIORS',
 'P. George': 'OKLAHOMA_CITY_THUNDER'}

In [10]:
def analyse_nba_game():
    with open('nba_game_warriors_thunder_20181016.txt') as csv_text:
        text_line = csv.reader(csv_text, delimiter = '|')
        play_by_play = [each for each in text_line]
    
    all_players = find_all_players(play_by_play)
    players_stats = all_player_stat(play_by_play, all_players)
    team_player = match_to_team(play_by_play, all_players)
    
    return players_stats

In [11]:
players_stats = analyse_nba_game()
players_stats

[{'player_name': 'K. Looney',
  'FG': 5,
  'FGA': 11,
  'FG%': 0.45454545454545453,
  '3P': 0,
  '3PA': 0,
  '3P%': 0,
  'FT': 0,
  'FTA': 0,
  'FT%': 0,
  'ORB': 8,
  'DRB': 2,
  'TRB': 10,
  'AST': 2,
  'STL': 1,
  'BLK': 2,
  'TOV': 1,
  'PF': 4,
  'PTS': 10},
 {'player_name': 'S. Adams',
  'FG': 6,
  'FGA': 12,
  'FG%': 0.5,
  '3P': 0,
  '3PA': 0,
  '3P%': 0,
  'FT': 5,
  'FTA': 8,
  'FT%': 0.625,
  'ORB': 4,
  'DRB': 7,
  'TRB': 11,
  'AST': 4,
  'STL': 2,
  'BLK': 0,
  'TOV': 2,
  'PF': 3,
  'PTS': 17},
 {'player_name': 'N. Noel',
  'FG': 1,
  'FGA': 2,
  'FG%': 0.5,
  '3P': 0,
  '3PA': 0,
  '3P%': 0,
  'FT': 1,
  'FTA': 2,
  'FT%': 0.5,
  'ORB': 3,
  'DRB': 4,
  'TRB': 7,
  'AST': 1,
  'STL': 1,
  'BLK': 1,
  'TOV': 0,
  'PF': 3,
  'PTS': 3},
 {'player_name': 'S. Curry',
  'FG': 11,
  'FGA': 20,
  'FG%': 0.55,
  '3P': 5,
  '3PA': 9,
  '3P%': 0.5555555555555556,
  'FT': 5,
  'FTA': 5,
  'FT%': 1.0,
  'ORB': 0,
  'DRB': 8,
  'TRB': 8,
  'AST': 9,
  'STL': 1,
  'BLK': 0,
  'TOV': 3

In [12]:
import pandas as pd

In [13]:
players_stats = pd.DataFrame(players_stats)
players_stats

Unnamed: 0,player_name,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,K. Looney,5,11,0.454545,0,0,0.0,0,0,0.0,8,2,10,2,1,2,1,4,10
1,S. Adams,6,12,0.5,0,0,0.0,5,8,0.625,4,7,11,4,2,0,2,3,17
2,N. Noel,1,2,0.5,0,0,0.0,1,2,0.5,3,4,7,1,1,1,0,3,3
3,S. Curry,11,20,0.55,5,9,0.555556,5,5,1.0,0,8,8,9,1,0,3,4,32
4,H. Diallo,2,4,0.5,0,0,0.0,0,1,0.0,0,1,1,1,1,0,0,2,4
5,T. Ferguson,0,2,0.0,0,2,0.0,0,0,0.0,2,2,4,1,0,0,1,3,0
6,J. Jerebko,0,0,0.0,0,0,0.0,0,0,0.0,0,3,3,0,0,0,1,2,0
7,K. Durant,9,21,0.428571,0,5,0.0,9,10,0.9,1,7,8,6,1,1,3,4,27
8,Q. Cook,1,2,0.5,1,1,1.0,0,0,0.0,1,1,2,1,0,0,2,2,3
9,R. Felton,1,5,0.2,0,3,0.0,4,5,0.8,0,3,3,1,0,0,2,0,6


In [14]:
players_stats = players_stats.append(players_stats.sum(numeric_only=True, axis=0), ignore_index=True)
players_stats['player_name'] = players_stats['player_name'].fillna('Team Totals')
players_stats

Unnamed: 0,player_name,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,K. Looney,5.0,11.0,0.454545,0.0,0.0,0.0,0.0,0.0,0.0,8.0,2.0,10.0,2.0,1.0,2.0,1.0,4.0,10.0
1,S. Adams,6.0,12.0,0.5,0.0,0.0,0.0,5.0,8.0,0.625,4.0,7.0,11.0,4.0,2.0,0.0,2.0,3.0,17.0
2,N. Noel,1.0,2.0,0.5,0.0,0.0,0.0,1.0,2.0,0.5,3.0,4.0,7.0,1.0,1.0,1.0,0.0,3.0,3.0
3,S. Curry,11.0,20.0,0.55,5.0,9.0,0.555556,5.0,5.0,1.0,0.0,8.0,8.0,9.0,1.0,0.0,3.0,4.0,32.0
4,H. Diallo,2.0,4.0,0.5,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,4.0
5,T. Ferguson,0.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,1.0,0.0,0.0,1.0,3.0,0.0
6,J. Jerebko,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,1.0,2.0,0.0
7,K. Durant,9.0,21.0,0.428571,0.0,5.0,0.0,9.0,10.0,0.9,1.0,7.0,8.0,6.0,1.0,1.0,3.0,4.0,27.0
8,Q. Cook,1.0,2.0,0.5,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,0.0,2.0,2.0,3.0
9,R. Felton,1.0,5.0,0.2,0.0,3.0,0.0,4.0,5.0,0.8,0.0,3.0,3.0,1.0,0.0,0.0,2.0,0.0,6.0
