# 00_data_collection.ipynb

# NFL Data Collection

This notebook collects raw data from:
- The Odds API (https://api.the-odds-api.com)
- Team Rankings (https://www.teamrankings.com/nfl/stat/)

## Data Sources
- Odds data: https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds/
- Scores data: https://api.the-odds-api.com/v4/sports/americanfootball_nfl/scores/
- Team offensive stats: https://www.teamrankings.com/nfl/stat/points-per-game
- Team defensive stats: https://www.teamrankings.com/nfl/stat/opponent-points-per-game

## Output Files
- raw_odds_api.csv: Raw betting odds data
- raw_scores_api.csv: Raw game scores data  
- raw_team_stats.csv: Raw team statistics

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import requests
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

### Fetch raw odds data from The Odds API

In [None]:
def fetch_odds_data(api_key):
    odds_url = f"https://api.the-odds-api.com/v4/sports/americanfootball_nfl/odds/?regions=us&markets=totals&apiKey={api_key}"
    
    response = requests.get(odds_url)
    odds_data = response.json()
    
    odds_df = pd.json_normalize(
        odds_data,
        record_path=['bookmakers', 'markets', 'outcomes'],
        meta=['id', 'sport_key', 'commence_time', 'home_team', 'away_team',
              ['bookmakers', 'key'], ['bookmakers', 'last_update']]
    )
    
    return odds_df

### Fetch raw scores data from The Odds API

In [None]:
def fetch_scores_data(api_key, days_from=3):
    scores_url = f"https://api.the-odds-api.com/v4/sports/americanfootball_nfl/scores/?apiKey={api_key}&daysFrom={days_from}"
    
    response = requests.get(scores_url)
    scores_data = response.json()
    
    # Convert to DataFrame
    scores_df = pd.DataFrame(scores_data)
    
    # Keep important columns
    if not scores_df.empty:
        scores_df = scores_df[['id', 'home_team', 'away_team', 'commence_time', 'completed', 'scores']]
    
    return scores_df

### Scrape raw team statistics from Team Rankings

In [None]:
def scrape_team_stats():
    
    def scrape_table(url):
        driver = None
        try:
            driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
            driver.get(url)
            
            # Wait for table to load
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, 'table.tr-table tbody tr'))
            )
            
            # Extract data
            rows = driver.find_elements(By.CSS_SELECTOR, 'table.tr-table tbody tr')
            data = []
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, 'td')
                data.append([cells[i].text.strip() for i in range(3)])
                
            return pd.DataFrame(data, columns=['Rank', 'Team', 'Value'])
            
        finally:
            if driver:
                driver.quit()
    
    # Scrape offensive and defensive stats
    offense_url = 'https://www.teamrankings.com/nfl/stat/points-per-game'
    defense_url = 'https://www.teamrankings.com/nfl/stat/opponent-points-per-game'
    
    offense_df = scrape_table(offense_url)
    offense_df = offense_df.rename(columns={'Value': 'Points_For', 'Rank': 'Offense_Rank'})
    
    defense_df = scrape_table(defense_url)
    defense_df = defense_df.rename(columns={'Value': 'Points_Against', 'Rank': 'Defense_Rank'})
    
    # Merge offensive and defensive stats
    team_stats = pd.merge(offense_df, defense_df, on='Team', suffixes=('_off', '_def'))
    
    return team_stats

### Main function to collect and save raw data

In [None]:
def main():
    # API Key
    API_KEY = "c4d67ad06db6b4a85ed66b87d40a1cf2"  # Replace with your API key
    
    # Collect odds data
    print("Fetching odds data...")
    odds_df = fetch_odds_data(API_KEY)
    odds_df.to_csv('raw_odds_api.csv', index=False)
    print("Saved raw odds data to raw_odds_api.csv")
    
    # Collect scores data
    print("Fetching scores data...")
    scores_df = fetch_scores_data(API_KEY)
    scores_df.to_csv('raw_scores_api.csv', index=False)
    print("Saved raw scores data to raw_scores_api.csv")
    
    # Collect team stats
    print("Scraping team stats...")
    team_stats = scrape_team_stats()
    team_stats.to_csv('raw_team_stats.csv', index=False)
    print("Saved raw team stats to raw_team_stats.csv")

if __name__ == "__main__":
    main()