In [20]:
import requests
import pandas as pd
import bs4
import re
import json
import logging

In [21]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [22]:
def fetch_fantasy_pros_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        logging.error(f"Error fetching data from {url}: {e}")
        return None

def parse_fantasy_pros_data(html_content):
    soup = bs4.BeautifulSoup(html_content, "html.parser")
    scripts = soup.find_all("script")
    for script in scripts:
        if script.string:
            match = re.search(r"var ecrData = ({.*?});", script.string, re.DOTALL)
            if match:
                json_data = match.group(1)
                return json.loads(json_data)
    logging.warning("No ecrData found in the HTML content")
    return None

def clean_player_name(name):
    name = re.sub(r"(?:I{1,3}|IV|V?I{0,3})\s*$", " ", name)
    name = re.sub(r"(Jr|Sr)\s*$", " ", name)
    return name.strip().lower().replace(' ', '')

def process_player_data(data, position):
    if not data or 'players' not in data:
        logging.error("Invalid data structure")
        return None

    df = pd.DataFrame(data['players'])
    
    # Clean player names
    df['player_name'] = df['player_name'].apply(clean_player_name)
    
    # Add position column
    df['position'] = position
    
    return df

def get_position_data(position):
    url = f'https://www.fantasypros.com/nfl/stats/{position.lower()}.php'
    html_content = fetch_fantasy_pros_data(url)
    if html_content:
        data = parse_fantasy_pros_data(html_content)
        if data:
            return process_player_data(data, position)
    return None

def main():
    positions = ['QB', 'RB', 'WR', 'TE', 'DST'] 
    all_data = []

    for position in positions:
        position_data = get_position_data(position)
        if position_data is not None:
            all_data.append(position_data)
            logging.info(f"{position} data processed successfully")
        else:
            logging.error(f"Failed to process {position} data")

    if all_data:
        combined_data = pd.concat(all_data, ignore_index=True)
        combined_data.to_csv("../backend/data/fantasy_pros_stats.csv", index=False)
        logging.info("All stats saved to fantasy_pros_stats.csv")
        print(combined_data.head())
    else:
        logging.error("No data to save")

if __name__ == "__main__":
    main()

ERROR:root:Failed to process QB data
ERROR:root:Failed to process RB data
ERROR:root:Failed to process WR data
ERROR:root:Failed to process TE data
ERROR:root:No data to save
