In [159]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from urllib2 import urlopen
import re

In [160]:
#Retrieve team names, prefixes, and urls
def get_teams(sport):
    #Sport should be: 'nfl', 'nba', 'mlb'
    baseUrl = 'http://espn.go.com/{0}/teams'
    req = requests.get(baseUrl.format(sport))
    
    soup = BeautifulSoup(req.text, 'lxml')
    tables = soup.find_all('ul', class_='medium-logos')
    teams = []
    prefix1 = []
    prefix2 = []
    teamUrls = []

    for table in tables:
        lis = table.find_all('li')
        for li in lis:
            info = li.h5.a
            teams.append(info.text)
            url = info['href']
            teamUrls.append(url)
            prefix1.append(url.split('/')[-2])
            prefix2.append(url.split('/')[-1])
    
    dic = {'url' : teamUrls, 'prefix2' : prefix2, 'prefix1' : prefix1}
    teamNames = pd.DataFrame(dic, index = teams)
    teamNames.index.name = 'team'
    
    return teamNames

In [161]:
#Retrieves titles of statistics in table
def get_titles(table):
    titles = table[0].findAll('td')
    titleNames = []
    for title in titles:
        titleNames.append(title.text)
    return titleNames

#Retrieve Stats present in table
def get_playerStats(table, titles,team,sport):
    totalStats = pd.DataFrame(columns=titles)
    for row in table[1:-2]:
        stats = row.findAll('td')
        player = []
        player.append(team)
        if sport != 'nfl':
            player.append(re.sub(r'[^\x00-\x7F]+','',
                                 stats[0].text.split(',')[1]))
        player.append(stats[0].text.split(',')[0])
        for stat in stats[1:]:
            player.append(stat.text)
        totalStats = totalStats.append(pd.Series(player, index = titles), ignore_index=True)
    return totalStats

#Find stats for all teams
def fill_Stats(teams, statType, sport):
    #statType for NFL: 0=Passing, 1=Rushing, 2=Recieving
    #statType for NFL: 0=Game, 1=Shooting
    #statType for any other: 0=All
    
    BaseURL= 'http://espn.go.com/{0}/team/stats/_/name/{1}'
    
    #Sets titles of stats based on first teams's team page
    html = urlopen(BaseURL.format(sport,teams['prefix1'][0]))
    soup = BeautifulSoup(html, 'lxml')
    table = soup.findAll('table')[statType].findAll('tr')[1:]
    titles = get_titles(table)
    if sport != 'nfl':
        titles = ['POS'] + titles
    titles = ['TEAM'] + titles
    stats = pd.DataFrame(columns=titles)
    
    #Fills out stats table for each team
    for team in teams['prefix1']:
        html = urlopen(BaseURL.format(sport,team))
        soup = BeautifulSoup(html, 'lxml')
        table = soup.findAll('table')[statType].findAll('tr')[1:]
        
        stats = stats.append(get_playerStats(table,titles,team,sport), ignore_index=True)
    
    return stats

In [162]:
def update_sport(sport, tableNum):
    teams = get_teams(sport)
    stats = fill_Stats(teams, tableNum, sport)
    
    if sport == 'nfl':
        dic = {0: 'Passing_stats', 1: 'Rushing_stats', 2: 'Recieving_stats'}
        fileName = dic[tableNum]
    elif sport == 'nba':
        dic = {0: 'Game_stats', 1: 'Shooting_stats'}
        fileName = dic[tableNum]
    else:
        fileName = 'Player_stats'
        
    BasePath = 'C:/Users/doc359/Documents/Sports/{0}/{1}.csv'
    stats.to_csv(BasePath.format(sport, fileName))

In [163]:
update_sport('nfl',0)
update_sport('nfl',1)
update_sport('nfl',2)

In [164]:
update_sport('nba',0)
update_sport('nba',1)
update_sport('nhl',0)