In [1]:
#Getting Data
import pandas as pd
from src.extract.nflreadpy_extract import *
from src.transform.transform_module_api import DataTransformerTeam
from src.transform.fe_module import *

dfs = []
for year in range(1999, 2026):
    df = get_reg_team_stats(year)
    dfs.append(df)

team_dfs = []
for df in dfs:
    clean = DataTransformerTeam.clean(df)
    team_dfs.append(team_stats(clean))

records_dfs = []
for year in range(1999, 2026):
    df = get_schedule(year)
    records_dfs.append(df)

team_records_dfs = []
for df in records_dfs:
    records = team_records(df)
    team_records_dfs.append(records)

years = list(range(1999, 2026))
season_stat_map = {year:df for year,df in zip(years, team_dfs)}
season_record_map = {year:df for year,df in zip(years, team_records_dfs)}

print(season_stat_map)
print(season_record_map)

{1999:    team  pass_yards  rush_yards  pass_attempts  rush_attempts  \
0   ARI        3085        1207            558            396   
1   ATL        3691        1196            508            373   
2   BAL        3172        1694            505            416   
3   BUF        3478        2040            513            519   
4   CAR        4447        1527            572            354   
5   CHI        4352        1387            684            396   
6   CIN        3504        2051            547            442   
7   CLE        2997        1150            490            313   
8   DAL        3278        2051            506            493   
9   DEN        3646        1864            551            465   
10  DET        4074        1245            557            356   
11   GB        4132        1519            604            386   
12  IND        4182        1660            545            419   
13  JAX        3716        2091            535            514   
14   KC        340

In [2]:
# Testing getting wins

team_records = {}

for year in season_stat_map.keys():
    schedule = nfl.load_schedules(year).to_pandas()

    wins = (
        schedule
        .assign(
            home_win=lambda x: (x.home_score > x.away_score).astype(int),
            away_win=lambda x: (x.away_score > x.home_score).astype(int)
        )
    )

    home_wins = wins.groupby(["season", "home_team"])["home_win"].sum()
    away_wins = wins.groupby(["season", "away_team"])["away_win"].sum()

    total_wins = home_wins.add(away_wins, fill_value=0).reset_index()
    total_wins.columns = ["season", "team", "wins"]

    home_games = schedule.groupby(["season", "home_team"]).size()
    away_games = schedule.groupby(["season", "away_team"]).size()

    total_games = home_games.add(away_games, fill_value=0).reset_index()
    total_games.columns = ["season", "team", "games_played"]
    total_wins['losses'] = total_games['games_played'] - total_wins['wins']

    team_records.setdefault(year, total_wins)

team_records

{1999:     season team  wins  losses
 0     1999  ARI     6      10
 1     1999  ATL     5      11
 2     1999  BAL     8       8
 3     1999  BUF    11       6
 4     1999  CAR     8       8
 5     1999  CHI     6      10
 6     1999  CIN     4      12
 7     1999  CLE     2      14
 8     1999  DAL     8       9
 9     1999  DEN     6      10
 10    1999  DET     8       9
 11    1999   GB     8       8
 12    1999  IND    13       4
 13    1999  JAX    15       3
 14    1999   KC     9       7
 15    1999  MIA    10       8
 16    1999  MIN    11       7
 17    1999   NE     8       8
 18    1999   NO     3      13
 19    1999  NYG     7       9
 20    1999  NYJ     8       8
 21    1999  OAK     8       8
 22    1999  PHI     5      11
 23    1999  PIT     6      10
 24    1999   SD     8       8
 25    1999  SEA     9       8
 26    1999   SF     4      12
 27    1999  STL    16       3
 28    1999   TB    12       6
 29    1999  TEN    16       4
 30    1999  WAS    11       7,
 

In [4]:
# testing fe_module
from src.transform.fe_module import *

season_stats = {}
season_averages = {}
for (year, df), (r_year, r_df) in zip(season_stat_map.items(), season_record_map.items()):
    percent = stat_percentages(df, r_df)
    season_stats.setdefault(year, percent)

for year, df in season_stat_map.items():
    averages = league_averages(df)
    season_averages.setdefault(year, averages)

print(season_stats)
print(season_averages)

{1999:    team  pass_yards  rush_yards  pass_attempts  rush_attempts  \
0   ARI        3085        1207            558            396   
1   ATL        3691        1196            508            373   
2   BAL        3172        1694            505            416   
3   BUF        3478        2040            513            519   
4   CAR        4447        1527            572            354   
5   CHI        4352        1387            684            396   
6   CIN        3504        2051            547            442   
7   CLE        2997        1150            490            313   
8   DAL        3278        2051            506            493   
9   DEN        3646        1864            551            465   
10  DET        4074        1245            557            356   
11   GB        4132        1519            604            386   
12  IND        4182        1660            545            419   
13  JAX        3716        2091            535            514   
14   KC        340