### This file is used to scrape team grade data from Pro Football Focus. To access PFF's stats an account will be necessary. Be sure to fill in your account information / chrome driver filepath in the second code block!

In [None]:
from selenium import webdriver  # Python Dynamic web scraping library
import time
import pandas as pd

In [None]:
# Open Selenium Instance

url = 'https://premium.pff.com/nfl/teams/2022/REGPO/tampa-bay-buccaneers/schedule'
signinurl = 'https://auth.pff.com'
email = 'your email here!'
pw = 'your pw here!'
email_input = '//*[@id="login-form_email"]'
pw_input = '//*[@id="login-form_password"]'
login_submit = '//*[@id="sign-in"]'
continuetostats = '/html/body/div/div/div/div/div/div/ul/li[2]/a'


driver = webdriver.Chrome(executable_path='your chrome driver path here!')
driver.get(signinurl)


driver.find_element("xpath", email_input).send_keys(email)
driver.find_element("xpath", pw_input).send_keys(pw)
driver.find_element("xpath", login_submit).click()



In [None]:
# Sign In
driver.find_element("xpath", '//*[@id="react-root"]/div/header/div[3]/button').click()

In [None]:
'''
This section is used to encode team names based on their web url extension on PFF's website.
The dictionary games_dict is used to store all final game data. 
'''

url_teams = [
    'arizona-cardinals',
    'atlanta-falcons',
    'baltimore-ravens',
    'buffalo-bills',
    'carolina-panthers',
    'chicago-bears',
    'cincinnati-bengals',
    'cleveland-browns',
    'dallas-cowboys',
    'denver-broncos',
    'detroit-lions',
    'green-bay-packers',
    'houston-texans',
    'indianapolis-colts',
    'jacksonville-jaguars',
    'kansas-city-chiefs',
    'las-vegas-raiders',
    'los-angeles-rams',
    'los-angeles-chargers',
    'miami-dolphins',
    'minnesota-vikings',
    'new-england-patriots',
    'new-orleans-saints',
    'new-york-giants',
    'new-york-jets',
    'philadelphia-eagles',
    'pittsburgh-steelers',
    'san-francisco-49ers',
    'seattle-seahawks',
    'tampa-bay-buccaneers',
    'tennessee-titans',
    'washington-commanders'
] 

encoded_teams = {
    'Arizona Cardinals' : 'AC',
    'Atlanta Falcons' : 'AF',
    'Baltimore Ravens' : 'BR',
    'Buffalo Bills' : 'BB',
    'Carolina Panthers' : 'CP',
    'Chicago Bears' : 'CHB',
    'Cincinnati Bengals' : 'CNB',
    'Cleveland Browns' : 'CLB',
    'Dallas Cowboys' : 'DC',
    'Denver Broncos' : 'DB',
    'Detroit Lions' : 'DL',
    'Green Bay Packers' : 'GBP',
    'Houston Texans' : 'HT',
    'Indianapolis Colts' : 'IC',
    'Jacksonville Jaguars' : 'JJ',
    'Kansas City Chiefs' : 'KC',
    'Las Vegas Raiders' : 'LVR',
    'Los Angeles Rams' : 'LAR',
    'Los Angeles Chargers' : 'LAC',
    'Miami Dolphins' : 'MD',
    'Minnesota Vikings': 'MV',
    'New England Patriots' : 'NEP',
    'New Orleans Saints' : 'NOS',
    'New York Giants' : 'NYG',
    'New York Jets' : 'NYJ',
    'Philadelphia Eagles' : 'PE',
    'Pittsburgh Steelers' : 'PS',
    'San Francisco 49ers' : 'SF',
    'Seattle Seahawks' : 'SS',
    'Tampa Bay Buccaneers' : 'TBB',
    'Tennessee Titans' : 'TT',
    'Washington Commanders' : 'WC',
    'Washington Football Team' : 'WFT',
    'Washington Redskins' : 'WR',
    'Oakland Raiders' : 'OR',
    'San Diego Chargers' : 'SDC',
    'St. Louis Rams' : 'SLR'
} 


url_decoded_teams = {
    'arizona-cardinals' : 'Arizona Cardinals',
    'atlanta-falcons' : 'Atlanta Falcons',
    'baltimore-ravens' : 'Baltimore Ravens',
    'buffalo-bills' : 'Buffalo Bills',
    'carolina-panthers' : 'Carolina Panthers',
    'chicago-bears' : 'Chicago Bears',
    'cincinnati-bengals' : 'Cincinnati Bengals',
    'cleveland-browns' : 'Cleveland Browns',
    'dallas-cowboys' : 'Dallas Cowboys',
    'denver-broncos' : 'Denver Broncos',
    'detroit-lions' : 'Detroit Lions',
    'green-bay-packers' : 'Green Bay Packers',
    'houston-texans' : 'Houston Texans',
    'indianapolis-colts' : 'Indianapolis Colts',
    'jacksonville-jaguars' : 'Jacksonville Jaguars',
    'kansas-city-chiefs' : 'Kansas City Chiefs', 
    'las-vegas-raiders' : 'Las Vegas Raiders',
    'los-angeles-rams' : 'Los Angeles Rams',
    'los-angeles-chargers' : 'Los Angeles Chargers',
    'miami-dolphins' : 'Miami Dolphins',
    'minnesota-vikings' : 'Minnesota Vikings',
    'new-england-patriots' : 'New England Patriots',
    'new-orleans-saints' : 'New Orleans Saints',
    'new-york-giants' : 'New York Giants',
    'new-york-jets' : 'New York Jets',
    'philadelphia-eagles' : 'Philadelphia Eagles',
    'pittsburgh-steelers' : 'Pittsburgh Steelers',
    'san-francisco-49ers' : 'San Francisco 49ers',
    'seattle-seahawks' : 'Seattle Seahawks',
    'tampa-bay-buccaneers' : 'Tampa Bay Buccaneers',
    'tennessee-titans' : 'Tennessee Titans',
    'washington-commanders' : 'Washington Commanders',
    'washington-football-team' : 'Washington Football Team',
    'washington-redskins' : 'Washington Redskins',
    'oakland-raiders' : 'Oakland Raiders',
    'san-diego-chargers' :'San Diego Chargers',
    'st-louis-rams' : 'St. Louis Rams'
} 


games_dict = {}

In [None]:
'''

This section of code created unique game id's (dictionary key) for each game by home/away teams, game date, and season.
The value for each key is another dictionary of each key-value pair for team grade by category (key) and the numeric value.

NAME DISCREPENCIES
# Oakland Raiders moved Las Vegas in 2020
# Washington Redskins became Washington Football Team in 2020, and Commanders in 2022
# St. Louis Rams moved in 2016 to Los Angeles
# San Diego Chargers moved in 2017 to Los Angeles
'''
# This variable can be modified to include whichever seasons you prefer
seasons =['2015', '2016', '2017', '2018', '2019', '2020', '2021','2022']

for szn in seasons:
    for url_team in url_teams:

        if url_team == 'washington-commanders':
            if szn in ['2020', '2021']:
                url_team = 'washington-football-team'
            elif szn < '2020':
                url_team = 'washington-redskins'
    
        elif url_team == 'las-vegas-raiders' and szn < '2020':
            url_team = 'oakland-raiders'

        elif url_team == 'los-angeles-rams' and szn < '2016':
            url_team = 'st-louis-rams'

        elif url_team == 'los-angeles-chargers' and szn < '2017':
            url_team = 'san-diego-chargers'
            

        team = url_decoded_teams[url_team]

        url = f'https://premium.pff.com/nfl/teams/{szn}/REGPO/{url_team}/schedule'
        driver.get(url)
        time.sleep(5) 
        

        home_stats = ['', '', '', '', '', 'home-score', '', '',
            'home-off', 
            'home-pass',
            'home-pblk',
            'home-recv',
            'home-run',
            'home-rblk',
            'home-def',
            'home-rdef',
            'home-tack',
            'home-prsh',
            'home-cov'
        ]
        away_stats = ['', '', '', '', '', 'away-score', '', '',
            'away-off',
            'away-pass',
            'away-pblk',
            'away-recv',
            'away-run',
            'away-rblk',
            'away-def',
            'away-rdef',
            'away-tack',
            'away-prsh',
            'away-cov'
        ]
        
        rows = driver.find_elements("class name", "kyber-table-body__row")
        row_nums = range(1,22)  # should be 1,22 for all possible games including playoffs
        stat_nums = range(1,19) # should be 1,20 to include special teams

        for row in row_nums:
            try:
                # if away = '@', the current team is away. if empty, the current team is home
                away = driver.find_element("xpath",f'//*[@id="react-root"]/div/div[2]/div/div/div[3]/div/div/div[2]/div/div[1]/div/div[1]/div/div[{row}]/div[2]').text
            except:
                continue

            
            is_empty = driver.find_element("xpath",f'//*[@id="react-root"]/div/div[2]/div/div/div[3]/div/div/div[2]/div/div[1]/div/div[2]/div/div[{row}]/div[4]').text
            if is_empty == '-' or is_empty == '':
                break

            for stat in stat_nums:
                try:
                    if stat == 1:
                        opp_team = driver.find_element("xpath",f'//*[@id="react-root"]/div/div[2]/div/div/div[3]/div/div/div[2]/div/div[1]/div/div[2]/div/div[{row}]/div[{stat}]').text
                        if away == '@':
                            game_id = encoded_teams[team] + '-'+ encoded_teams[opp_team]
                        else:
                            game_id = encoded_teams[opp_team] + '-' + encoded_teams[team]
                    elif stat == 2:
                        game_date = driver.find_element("xpath",f'//*[@id="react-root"]/div/div[2]/div/div/div[3]/div/div/div[2]/div/div[1]/div/div[2]/div/div[{row}]/div[{stat}]').text
                        if game_date == '':
                            break
                        
                        game_id = game_id + '-' + game_date + '/' + szn
                    elif stat in (3,4,6,7):
                        continue
                    else:

                        if game_id not in games_dict:
                            games_dict[game_id] = {
                            'home-score' : '',
                            'away-score' : '',
                            'home-off' : '',
                            'home-pass' : '',
                            'home-pblk' : '',
                            'home-recv' : '',
                            'home-run' : '',
                            'home-rblk' : '',
                            'away-off' : '',
                            'away-pass' : '',
                            'away-pblk' : '',
                            'away-recv' : '',
                            'away-run' : '',
                            'away-rblk' : '',
                            'home-def' : '',
                            'home-rdef' : '',
                            'home-tack' : '',
                            'home-prsh' : '',
                            'home-cov' : '',
                            'away-def' : '',
                            'away-rdef' : '',
                            'away-tack' : '',
                            'away-prsh' : '',
                            'away-cov' : ''
                        }

                        current_cell = driver.find_element("xpath",f'//*[@id="react-root"]/div/div[2]/div/div/div[3]/div/div/div[2]/div/div[1]/div/div[2]/div/div[{row}]/div[{stat}]').text

                        if away == '@':
                            games_dict[game_id][away_stats[stat]] = current_cell
                        else:
                            games_dict[game_id][home_stats[stat]] = current_cell

                
                except:
                    continue
                


In [None]:
# Change Data to CSV

df = pd.DataFrame(games_dict)
df = df.T
df.to_excel("output.xlsx") 
df.to_csv("output.csv")