In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re  # To use regular expressions

# URL of the page to scrape
url = "https://www.pro-football-reference.com/years/2000/fantasy.htm"

# Fetch the webpage
response = requests.get(url)
response.raise_for_status()  # Check if the request was successful

# Parse the page content
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table
table = soup.find('table', {'id': 'fantasy'})

# Initialize lists to store the data
players = []
fantpt_data = []
ppr_data = []
ovrrank_data = []

# Iterate over each row in the table body
for row in table.tbody.find_all('tr'):
    # Skip rows that are headers or have no data
    if 'class' in row.attrs and 'thead' in row.attrs['class']:
        continue
    
    # Extract the specific data cells
    player = row.find('td', {'data-stat': 'player'})
    fantpt = row.find('td', {'data-stat': 'fantasy_points'})
    ppr = row.find('td', {'data-stat': 'fantasy_points_ppr'})
    ovrrank = row.find('td', {'data-stat': 'fantasy_rank_overall'})  # Updated OvrRank column

    # Clean player name by removing "*" and "+"
    if player:
        player_name = player.get_text(strip=True)
        player_name = re.sub(r'[\*\+]', '', player_name)  # Remove "*" and "+"
        players.append(player_name)
    else:
        players.append("")
    
    # Append the other data to their respective lists (handle missing data gracefully)
    fantpt_data.append(fantpt.get_text(strip=True) if fantpt else "")
    ppr_data.append(ppr.get_text(strip=True) if ppr else "")
    ovrrank_data.append(ovrrank.get_text(strip=True) if ovrrank else "")

# Create a DataFrame
data = {
    'Player': players,
    'FantPt': fantpt_data,
    'PPR': ppr_data,
    'OvrRank': ovrrank_data
}
df = pd.DataFrame(data)

# Save to CSV (without headers)
df.to_csv('nfl_fantasy_stats_2000.csv', index=False, header=False)

# Debugging: Print the first few rows of the DataFrame
print(df.head())


             Player FantPt    PPR OvrRank
0    Marshall Faulk    379  459.9       1
1    Edgerrin James    332  395.3       2
2       Jeff Garcia    341  340.5       3
3  Daunte Culpepper    338  338.5       4
4      Eddie George    284  334.2       5


In [10]:
df

Unnamed: 0,Player,FantPt,PPR,OvrRank
0,Marshall Faulk,379,459.9,1
1,Edgerrin James,332,395.3,2
2,Jeff Garcia,341,340.5,3
3,Daunte Culpepper,338,338.5,4
4,Eddie George,284,334.2,5
...,...,...,...,...
545,Herbert Goodman,-2,-1.2,
546,Joe Hamilton,-2,-2.2,
547,Winslow Oliver,-2,-2.0,
548,David Palmer,-2,-1.2,


In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re  # To use regular expressions

# URL of the page to scrape
url = "https://www.pro-football-reference.com/years/2000/fantasy.htm"

# Fetch the webpage
response = requests.get(url)
response.raise_for_status()  # Check if the request was successful

# Parse the page content
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table
table = soup.find('table', {'id': 'fantasy'})

# Initialize lists to store the data
players = []
fantpt_data = []
ppr_data = []
ovrrank_data = []

# Iterate over each row in the table body
for row in table.tbody.find_all('tr'):
    # Skip rows that are headers or have no data
    if 'class' in row.attrs and 'thead' in row.attrs['class']:
        continue
    
    # Extract the specific data cells
    player = row.find('td', {'data-stat': 'player'})
    fantpt = row.find('td', {'data-stat': 'fantasy_points'})
    ppr = row.find('td', {'data-stat': 'fantasy_points_ppr'})
    ovrrank = row.find('td', {'data-stat': 'fantasy_rank_overall'})  # Updated OvrRank column

    # Clean player name by removing "*" and "+"
    if player:
        player_name = player.get_text(strip=True)
        player_name = re.sub(r'[\*\+]', '', player_name)  # Remove "*" and "+"
        players.append(player_name)
    else:
        players.append("")
    
    # Append the other data to their respective lists (handle missing data gracefully)
    fantpt_data.append(fantpt.get_text(strip=True) if fantpt else "")
    ppr_data.append(ppr.get_text(strip=True) if ppr else "")
    ovrrank_data.append(ovrrank.get_text(strip=True) if ovrrank else "")

# Create a DataFrame
data = {
    'Player': players,
    'FantPt': fantpt_data,
    'PPR': ppr_data,
    'OvrRank': ovrrank_data
}
df = pd.DataFrame(data)

# Save to CSV (with headers)
df.to_csv('nfl_fantasy_stats_2000.csv', index=False, header=True)

# Debugging: Print the first few rows of the DataFrame
print(df.head())


             Player FantPt    PPR OvrRank
0    Marshall Faulk    379  459.9       1
1    Edgerrin James    332  395.3       2
2       Jeff Garcia    341  340.5       3
3  Daunte Culpepper    338  338.5       4
4      Eddie George    284  334.2       5
