This first section is generating my initialiat dataset utilitizing the nfl_data_py library

In [None]:
import pandas as pd
import nfl_data_py as nfl

# Define the season
SEASON = 2024

# Load the schedule data for the 2024 season
# This will include results for completed games.
try:
    schedule_df = nfl.import_schedules([SEASON])
    
    # Select and rename columns to match your request
    
    games_df = schedule_df[[ #generate games dataframe
        'game_id', 'season', 'week', 'away_team', 'home_team', 
        'away_score', 'home_score'
    ]].copy()

    # Creating game column for greater readability
    games_df['game_id'] = games_df['game_id'].astype(str) # Convert game_id to string for consistency
    games_df['game'] = games_df['away_team'] + ' @ ' + games_df['home_team']

    print("Successfully loaded 2024 game and score data.")
    print(games_df.head())

except Exception as e:
    print(f"An error occurred: {e}")
    print("Please ensure the 2024 season data is available in the library.")

Successfully loaded 2024 game and score data.
              game_id  season  week away_team home_team  away_score  \
6706   2024_01_BAL_KC    2024     1       BAL        KC        20.0   
6707   2024_01_GB_PHI    2024     1        GB       PHI        29.0   
6708  2024_01_PIT_ATL    2024     1       PIT       ATL        18.0   
6709  2024_01_ARI_BUF    2024     1       ARI       BUF        28.0   
6710  2024_01_TEN_CHI    2024     1       TEN       CHI        17.0   

      home_score       game  
6706        27.0   BAL @ KC  
6707        34.0   GB @ PHI  
6708        10.0  PIT @ ATL  
6709        34.0  ARI @ BUF  
6710        24.0  TEN @ CHI  


In [None]:
if not games_df.empty:
    #Create a DataFrame with DVOA data
    dvoa_data = {
        'TEAM': ['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN',
                 'DET', 'GB', 'HOU', 'IND', 'JAX', 'KC', 'LAC', 'LAR', 'LV', 'MIA',
                 'MIN', 'NE', 'NO', 'NYG', 'NYJ', 'PHI', 'PIT', 'SEA', 'SF', 'TB',
                 'TEN', 'WAS'],
        'TOT DVOA': ['8.8%', '-7.3%', '41.4%', '22.7%', '-32.9%', '-12.7%', '7.0%', '-39.9%',
                     '-11.6%', '16.3%', '34.8%', '24.5%', '5.3%', '-7.1%', '-17.7%', '14.7%',
                     '12.6%', '5.1%', '-19.6%', '-9.8%', '16.1%', '-29.7%', '-10.6%',
                     '-21.9%', '-11.4%', '21.3%', '6.3%', '2.3%', '6.7%', '9.4%', '33.5%',
                     '11.5%']
    }

    dvoa_df = pd.DataFrame(dvoa_data)

   
    # The DVOA ratings are in percentage format, so we need to convert them to a numeric format
    # Remove the '%' sign and convert to float
    dvoa_df['dvoa_rating'] = dvoa_df['TOT DVOA'].str.replace('%', '').astype(float) / 100  # Convert the percentage string to a numeric value

    # We only need the team and the numeric rating for the merge
    dvoa_to_merge = dvoa_df[['TEAM', 'dvoa_rating']] # Select only the relevant columns for merging
    print("DVOA data has been loaded and cleaned.")


    # Merge the DataFrames

    # Merge #1: Add the DVOA rating for the HOME team
    final_df = pd.merge( # Merge the games DataFrame with the DVOA DataFrame
        left=games_df,
        right=dvoa_to_merge,
        left_on='home_team', # Key from the left DataFrame
        right_on='TEAM',      # Key from the right DataFrame
        how='left'
    )
    # Rename the new column and drop the redundant 'TEAM' column
    final_df = final_df.rename(columns={'dvoa_rating': 'home_dvoa'})
    final_df = final_df.drop(columns=['TEAM'])

    # Merge #2: Add the DVOA rating for the AWAY team
    final_df = pd.merge(
        left=final_df,
        right=dvoa_to_merge,
        left_on='away_team', # Key from the left DataFrame
        right_on='TEAM',      # Key from the right DataFrame
        how='left'
    )
    # Rename the new column and drop the redundant 'TEAM' column
    final_df = final_df.rename(columns={'dvoa_rating': 'away_dvoa'})
    final_df = final_df.drop(columns=['TEAM'])

    print("Merging complete.")


    # View the Final Result
    
    # Let's add a DVOA differential column for fun, which is a common metric
    # A positive number means the home team has a better season DVOA rating.
    final_df['dvoa_diff'] = final_df['home_dvoa'] - final_df['away_dvoa']
    
    # Display the first few rows with the new DVOA columns
    print("\n--- Final Merged DataFrame ---")
    print(final_df[['week', 'home_team', 'home_dvoa', 'away_team', 'away_dvoa', 'dvoa_diff']].head())
    #save the final DataFrame to a CSV file
    
final_df.to_csv('data/nfl_2024_games_with_dvoa.csv', index=False)


✅ Step 2: Your DVOA data has been loaded and cleaned.
✅ Step 3: Merging complete.

--- Final Merged DataFrame ---
   week home_team  home_dvoa away_team  away_dvoa  dvoa_diff
0     1        KC      0.147       BAL      0.414     -0.267
1     1       PHI      0.213        GB      0.245     -0.032
2     1       ATL     -0.073       PIT      0.063     -0.136
3     1       BUF      0.227       ARI      0.088      0.139
4     1       CHI     -0.127       TEN      0.335     -0.462


In [10]:
#Load dataframe from CSV file
final_df = pd.read_csv('data/nfl_2024_games_with_dvoa.csv')
# Display the first few rows of the final DataFrame
print("\Loaded Final DataFrame from CSV")
print(final_df.head())
# Print column names to verify
print("Column Names:")
print(final_df.columns.tolist())
#Print DataFrame info
print("DataFrame Info:")
print(final_df.info())
#Print DataFrame description
print("DataFrame Description:")
print(final_df.describe())
#print data types of each column
print("Data Types of Each Column:")
print(final_df.dtypes)


\Loaded Final DataFrame from CSV
           game_id  season  week away_team home_team  away_score  home_score  \
0   2024_01_BAL_KC    2024     1       BAL        KC        20.0        27.0   
1   2024_01_GB_PHI    2024     1        GB       PHI        29.0        34.0   
2  2024_01_PIT_ATL    2024     1       PIT       ATL        18.0        10.0   
3  2024_01_ARI_BUF    2024     1       ARI       BUF        28.0        34.0   
4  2024_01_TEN_CHI    2024     1       TEN       CHI        17.0        24.0   

        game  home_dvoa  away_dvoa  dvoa_diff  
0   BAL @ KC      0.147      0.414     -0.267  
1   GB @ PHI      0.213      0.245     -0.032  
2  PIT @ ATL     -0.073      0.063     -0.136  
3  ARI @ BUF      0.227      0.088      0.139  
4  TEN @ CHI     -0.127      0.335     -0.462  
Column Names:
['game_id', 'season', 'week', 'away_team', 'home_team', 'away_score', 'home_score', 'game', 'home_dvoa', 'away_dvoa', 'dvoa_diff']
DataFrame Info:
<class 'pandas.core.frame.DataFrame'>

  print("\Loaded Final DataFrame from CSV")
