In [1]:
from __future__ import print_function, division
import requests
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import pickle as pkl

chromedriver = "/Applications/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)

In [8]:
# returns list of names scraped from site - all info scraped by each team in order of best fantasy game by QB to worst
def getNames(innerHTML):
    names = [str(link) for link in innerHTML.find_all('a') if 'nfl-stats/player-details' in str(link)]
    names = [link.split('>')[1].split('<')[0] for link in names]
    names = [str(name) for name in names]
    return names

In [9]:
# returns list of week numbers
def getWeeks(innerHTML):
    weeks = [str(week) for week in innerHTML.find_all('span') if 'dataItem.Week' in str(week)]
    weeks = [int(week.split('>')[1].split('<')[0]) for week in weeks]
    return weeks

In [10]:
# returns list of opponents
def getOPP(innerHTML):
    opponents = [str(opponent) for opponent in innerHTML.find_all('span') if 'dataItem.Opponent' in str(opponent)]
    opponents = [opponent.split('>')[1].split('<')[0] for opponent in opponents]
    opponents = [str(opponent) for opponent in opponents]
    return opponents

In [11]:
# returns multiple lists of different stats
def getStats(innerHTML):
    data = [str(data) for data in innerHTML.find_all('td') if 'class=""' in str(data) and 'span' not in str(data)]
    data = [data for data in data if 'href' not in data]
    data = [data.split('>')[1].split('<')[0] for data in data]
    newData = []
    for x in range(len(data)//12):
        newData.append(data[12*x:12*x+11])
    for statList in newData:
        if len(statList) == 11:
            statList.append(0)

    completions = []
    attempts = []
    percentage = []
    passingYards = []
    avgYdsThrow = []
    passTDs = []
    interceptions = []
    passerRating = []
    rushAtt = []
    rushYds = []
    rushAvg = []
    rushTDs = []

    for x in range(len(newData)):
        completions.append(int(newData[x][0]))
        attempts.append(int(newData[x][1]))
        percentage.append(float(newData[x][2]))
        passingYards.append(int(newData[x][3]))
        avgYdsThrow.append(float(newData[x][4]))
        passTDs.append(int(newData[x][5]))
        interceptions.append(int(newData[x][6]))
        passerRating.append(float(newData[x][7]))
        rushAtt.append(int(newData[x][8]))
        rushYds.append(int(newData[x][9]))
        rushAvg.append(float(newData[x][10]))
        rushTDs.append(int(newData[x][11]))

    return [completions, attempts, percentage, passingYards, avgYdsThrow, passTDs, interceptions, passerRating, rushAtt, rushYds, rushAvg, rushTDs]

In [12]:
# returns list of fantasy points
def getFPts(innerHTML):
    fantasyPoints = [str(fpts) for fpts in innerHTML.find_all('span') if 'dataItem.FantasyPoints' in str(fpts) and 'PerGame' not in str(fpts)]
    fantasyPoints = [float(fpts.split('>')[1].split('<')[0]) for fpts in fantasyPoints]
    return fantasyPoints

In [13]:
# puts everything into dataframe
def getDF(names, weeks, opponents, stats, fantasyPoints):
    df = pd.DataFrame({'Name':names, 'Week':weeks, 'OPP':opponents, 'CMP':stats[0], 'ATT':stats[1], 'PCT':stats[2], 'PassYd':stats[3], 'AvgYds/Pass':stats[4], 'PassTD':stats[5], 'INT':stats[6], 'Rating':stats[7], 'RushAtt':stats[8], 'RushYds':stats[9], 'AvgYds/Rush':stats[10], 'RushTD':stats[11], 'Fantasy Points':fantasyPoints})
    return df

In [14]:
# set unedited url and log in to website
url = 'https://fantasydata.com/nfl-stats/fantasy-football-leaders?position=2&team={}&season=2017&seasontype=1&scope=2&subscope=1&startweek=1&endweek=17'
# log in and go to page to get data
driver.get('https://fantasydata.com/user/login')
time.sleep(1);
logIn = driver.find_element_by_name('Email')
logIn.send_keys('rishis1096@gmail.com')
logIn = driver.find_element_by_name('Password')
logIn.send_keys('password') # false password due to code being put online
logIn.send_keys(Keys.RETURN)

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"name","selector":"Password"}
  (Session info: chrome=69.0.3497.100)
  (Driver info: chromedriver=2.42.591059 (a3d9684d10d61aa0c45f6723b327283be1ebaad8),platform=Mac OS X 10.13.6 x86_64)


In [15]:
# create entire DF
for x in range(32):
    driver.get(url.format(str(x)))
    time.sleep(1);
    innerHTML = driver.execute_script("return document.body.innerHTML")
    innerHTML = BeautifulSoup(innerHTML, 'lxml')
    
    names = getNames(innerHTML)
    weeks = getWeeks(innerHTML)
    opponents = getOPP(innerHTML)
    stats = getStats(innerHTML)
    fantasyPoints = getFPts(innerHTML)
    

    if x == 0:
        statsDF = getDF(names, weeks, opponents, stats, fantasyPoints)
    else:
        df = getDF(names, weeks, opponents, stats, fantasyPoints)
        statsDF = pd.concat([statsDF, df])

statsDF


Unnamed: 0,Name,Week,OPP,CMP,ATT,PCT,PassYd,AvgYds/Pass,PassTD,INT,Rating,RushAtt,RushYds,AvgYds/Rush,RushTD,Fantasy Points
0,Carson Palmer,3,DAL,29,48,60.4,325,6.8,2,0,94.53,3,7,2.3,0,21.70
1,Carson Palmer,6,TB,18,22,81.8,283,12.9,3,1,139.39,5,-4,-0.8,0,20.92
2,Blaine Gabbert,11,HOU,22,34,64.7,257,7.6,3,2,92.40,3,13,4.3,0,19.58
3,Blaine Gabbert,12,JAX,22,38,57.9,241,6.3,2,1,83.33,6,17,2.8,0,17.34
4,Carson Palmer,4,SF,33,51,64.7,357,7.0,1,1,83.54,2,1,0.5,0,16.38
5,Carson Palmer,2,IND,19,36,52.8,332,9.2,1,1,82.18,3,6,2.0,0,15.88
6,Carson Palmer,5,PHI,28,44,63.6,291,6.6,1,0,90.25,0,0,0.0,0,15.64
7,Drew Stanton,10,SEA,24,47,51.1,273,5.8,1,0,75.93,1,1,1.0,0,15.02
8,Drew Stanton,9,SF,15,30,50.0,201,6.7,2,1,80.00,4,2,0.5,0,14.24
9,Drew Stanton,16,NYG,20,34,58.8,209,6.1,2,2,71.81,2,1,0.5,0,12.46


In [3]:
def getDefTeams(innerHTML):    
    defTeams = [str(defense) for defense in innerHTML.find_all('a') if 'nfl-stats/team-details' in str(defense)]
    defTeams = [defense.split('>')[1].split('<')[0] for defense in defTeams]
    return defTeams

In [4]:
def getQBptsAllowed(innerHTML):    
    defStats = [str(points) for points in innerHTML.find_all('td') if 'dataItem' not in str(points) and 'href' not in str(points)]
    defStats = [points.split('>')[1].split('<')[0] for points in defStats]
    QBpointsAllowed = [defStats[0]]
    for x in range(6,len(defStats),6):
        QBpointsAllowed.append(defStats[x])
    return QBpointsAllowed

In [5]:
def getDefDF(defTeams, QBpointsAllowed, week):
    df = pd.DataFrame({'OPP': defTeams, 'QB Points Allowed': QBpointsAllowed, 'Week': week})
    return df

In [6]:
url = 'https://fantasydata.com/nfl-stats/fantasy-football-points-allowed-defense-by-position?season=2017&seasontype=1&scope=2&startweek={}&endweek={}'

In [17]:
for x in range(1,18):
    driver.get(url.format(str(x), str(x)))
    time.sleep(1);
    innerHTML = driver.execute_script("return document.body.innerHTML")
    innerHTML = BeautifulSoup(innerHTML, 'lxml')
    
    defTeams = getDefTeams(innerHTML)
    QBpointsAllowed = getQBptsAllowed(innerHTML)
    week = []
    for num in range(len(defTeams)):
        week.append(x)
    
    if x == 1:
        defDF = getDefDF(defTeams, QBpointsAllowed, week)
    else:
        newDefDF = getDefDF(defTeams, QBpointsAllowed, week)
        defDF = pd.concat([defDF, newDefDF])
defDF
    

ValueError: arrays must all be same length

In [24]:
with open("QBdata", 'wb') as picklefile:
    pkl.dump(statsDF, picklefile)

In [48]:
defDF['QB Points Allowed'] = defDF['QB Points Allowed'].astype(float)
defDF['Week'] = defDF['Week'].astype(int)
statsDF['Week'] = statsDF['Week'].astype(int)

In [54]:
fantasyDEF = pd.merge(defDF, statsDF, on=['OPP', 'Week'])
fantasyDEF = fantasyDEF[['Name', 'Week', 'OPP', 'QB Points Allowed']]
defDF.loc[defDF['OPP'] == 'BAL']

Unnamed: 0,OPP,QB Points Allowed,Week
0,BAL,-3.0,1
10,BAL,9.1,2
30,BAL,27.26,3
16,BAL,10.54,4
7,BAL,11.86,5
12,BAL,9.72,6
14,BAL,6.42,7
0,BAL,3.04,8
9,BAL,15.32,9
0,BAL,3.46,11


In [52]:
with open("fantasyDEFdata", 'wb') as picklefile:
    pkl.dump(fantasyDEF, picklefile)