# NFL Combine Scraper

In [None]:
# Import
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import csv
from sklearn.preprocessing import MinMaxScaler
import requests
from bs4 import BeautifulSoup

In [None]:
def scrape_combine_data(year):
    url = f'https://www.pro-football-reference.com/draft/{year}-combine.htm'
    
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'class': 'stats_table'})
    rows = table.find_all('tr')[1:]

    player_data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) > 0: 
            player = {
                'Year': year,
                'Player': row.find('th', {'data-stat': 'player'}).get_text(),
                'Pos': cols[0].get_text() if cols[0].get_text() != '' else np.nan,
                'School': cols[1].get_text() if cols[1].get_text() != '' else np.nan,
                'College': cols[2].get_text() if cols[2].get_text() != '' else np.nan,
                'Ht': cols[3].get_text() if cols[3].get_text() != '' else np.nan,
                'Wt': cols[4].get_text() if cols[4].get_text() != '' else np.nan,
                '40yd': cols[5].get_text() if cols[5].get_text() != '' else np.nan,
                'Vertical': cols[6].get_text() if cols[6].get_text() != '' else np.nan,
                'Bench': cols[7].get_text() if cols[7].get_text() != '' else np.nan,
                'Broad Jump': cols[8].get_text() if cols[8].get_text() != '' else np.nan,
                '3Cone': cols[9].get_text() if len(cols) > 9 and cols[9].get_text() != '' else np.nan,
                'Shuttle': cols[10].get_text() if len(cols) > 10 and cols[10].get_text() != '' else np.nan,
                'Drafted': cols[11].get_text() if len(cols) > 11 and cols[11].get_text() != '' else np.nan
            }
            player_data.append(player)

    return pd.DataFrame(player_data)

In [None]:
all_data = []
for year in range(2012, 2021):
    df_year = scrape_combine_data(year)
    all_data.append(df_year)

final_df = pd.concat(all_data, ignore_index=True)


In [None]:
print(final_df.columns)

In [None]:
final_df.to_csv('complete_combine.csv')