In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
basic = 'MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-'.split(',')
advanced = 'MP,TS%,eFG%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,BPM'.split(',')

In [3]:
def table_to_df(t, isbasic):
    table_body = t.find('tbody')
    rows = table_body.find_all('tr')
    
    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        if len(cols) == 0:
            continue
        else:
            if isbasic is True:
                if len(cols) == 1:
                    data.append([np.nan] * 20)
                else:
                    for j in range(len(cols)):
                        if j == 0:
                            time = cols[j].split(':')
                            cols[j] = float(time[0]) + float(time[1])/60
                        elif cols[j] == '':
                            cols[j] = 0.0
                        elif j in [3,6,9]:
                            cols[j] = float(cols[j])
                        else:
                            cols[j] = int(cols[j])
                    data.append(cols)
            else:
                if len(cols) == 1:
                    data.append([np.nan] * 16)
                else:
                    for j in range(len(cols)):
                        if j == 0:
                            time = cols[j].split(':')
                            cols[j] = float(time[0]) + float(time[1])/60
                        elif cols[j] == '':
                            cols[j] = 0.0
                        elif j in [13,14]:
                            cols[j] = int(cols[j])
                        else:
                            cols[j] = float(cols[j])
                    data.append(cols)
    if isbasic:
        return pd.DataFrame(data = data, columns = basic)
    else:
        return pd.DataFrame(data = data, columns = advanced)

In [4]:
def make_series_csv(textfile, out_filename):
    basic_away_dfs = []
    advanced_away_dfs = []
    basic_home_dfs = []
    advanced_home_dfs = []

    with open(textfile,'r') as f:
        lines = f.readlines()
        game = 1
        for line in lines:
            parts = line.split(';')
            url = parts[0]
            away_players = parts[1].split(',')
            home_players = parts[2].split(',')
            away_win = parts[3]
            home_win = parts[4]
            away_team = parts[5]
            home_team = parts[6]
            overtime = bool(int(parts[7][0]))
        
            page = requests.get(url)
            soup = BeautifulSoup(page.content, 'html.parser')
            table = soup.find_all('table')
        
            if overtime:
                s = [0,8,9,17]
            else:
                s = [0,7,8,15]
            for i in range(len(table)):
                if i == s[0] or i == s[2]:
                    df = table_to_df(table[i], True)
                    if i == s[0]:
                        df.insert(0, 'Player', away_players)
                        df.insert(1, 'Team', 0)
                        df['Team'] = away_team
                        df['Win'] = int(away_win)
                        df['Game'] = game
                        basic_away_dfs.append(df)
                    else:
                        df.insert(0, 'Player', home_players)
                        df.insert(1, 'Team', 0)
                        df['Team'] = home_team
                        df['Win'] = int(home_win)
                        df['Game'] = game
                        basic_home_dfs.append(df)
                elif i == s[1] or i == s[3]:
                    df = table_to_df(table[i], False)
                    if i == s[1]:
                        advanced_away_dfs.append(df)
                    else:
                        advanced_home_dfs.append(df)
            game += 1
            
    f.close()

    df = pd.DataFrame()

    for i in range(len(basic_away_dfs)):
        aj = basic_away_dfs[i].join(advanced_away_dfs[i], rsuffix = 'r')
        aj = aj.drop(columns = 'MPr')
        hj = basic_home_dfs[i].join(advanced_home_dfs[i], rsuffix = 'r')
        hj = hj.drop(columns = 'MPr')
        df = df.append(aj.append(hj, ignore_index = True), ignore_index = True)
    
    df = df.fillna(0.0)
    df.to_csv(out_filename, index = False)
    print('Completed ' + out_filename)
    return

In [5]:
text_files = ['DEN-UTA.txt', 'DEN-LAC.txt']
csv_files = ['Denver_Utah_Round1.csv', 'Denver_LAC_Round2.csv']

for tfile, csv_file in zip(text_files, csv_files):
    make_series_csv(tfile, csv_file)

Completed Denver_Utah_Round1.csv
Completed Denver_LAC_Round2.csv
