# Dallas Cowboys Season 2021 Stats

In [None]:
import pandas as pd
import os

In [None]:
year = 2021
team = "Dallas Cowboys"

## Data Extraction

In [None]:
dallas_url = f"https://www.pro-football-reference.com/teams/dal/{year}.htm"


In [None]:
dfs = pd.read_html(dallas_url)
season_stats_raw = dfs[0]
results_raw = dfs[1]
conversions_raw = dfs[2]

In [None]:
print(f"Shape: {season_stats_raw.shape}")
season_stats_raw.head(3)

In [None]:
print(f"Shape: {results_raw.shape}")
results_raw.head(3)

In [None]:
print(f"Shape: {conversions_raw.shape}")
conversions_raw.head(3)

## Data Cleaning

### Season Data (Summary Stats and Conversions)

#### Sumary Stats

In [None]:
season_stats_raw.columns

In [None]:
# Create column new names dict
season_column_dict = {
    "Yds": "Yards",
    "Pts": "Points",
    "PF": "Scored Points",
    "Tot Yds & TO": "Offense",
    "Ply": "Plays",
    "TO": "Turnovers",
    "FL": "Fumbles",
    "1stD":  "First_Downs",
    "Cmp": "Completions",
    "Att": "Attempts",
    "Yds": "Yards",
    "Pen": "Number",
    "1stPy": "First_Downs_by_Penalty",
    "#Dr": "Number of Drives",
    "Sc%": "%_Scoring_Drives",
    "TO%": "%_Turnovers_Drives"    
}
# Apply names to columns
season_stats_raw = season_stats_raw.rename(columns=season_column_dict)
# Create list of columns
season_new_columns = [' '.join(col).strip() if "Unnamed" not in col[0] else col[1] for col in season_stats_raw.columns.values]
season_new_columns = [col_name.replace(" ", "_") for col_name in season_new_columns]
# Apply new column names (flatten)
season_stats_raw.columns = season_new_columns
season_stats_raw.columns

In [None]:
season_stats_raw = season_stats_raw.convert_dtypes()
season_stats_raw.dtypes

#### Conversions

In [None]:
conversions_raw.columns

In [None]:
# Create list of columns
conversion_new_columns = [col[1] for col in conversions_raw.columns.values]
conversion_new_columns = [col_name.replace(" ", "_") for col_name in conversion_new_columns]
# Apply new column names (flatten)
conversions_raw.columns = conversion_new_columns
conversions_raw.columns

In [None]:
conversions_raw = conversions_raw.convert_dtypes()
conversions_raw.dtypes

#### Merge Season Dfs

In [None]:
season_complete = season_stats_raw.merge(conversions_raw, on="Player")
season_complete

In [None]:
rankings_clean = season_complete.loc[season_complete["Player"].str.contains("Lg")].dropna(axis=1).reset_index(drop=True)
season_stats_clean = season_complete.loc[~season_complete["Player"].str.contains("Lg")].dropna(axis=1).reset_index(drop=True)
rankings_clean

### Data per Week

In [None]:
results_raw.columns

In [None]:
# Create column new names dict
results_columns = {
    "TotYd": "Total_Yards",
    "1stD":  "First_Downs",
    "TO": "Turnovers",
    "RushY": "Rush_Yards",
    "PassY": "Pass_Yards",
    "Rec": "Record",
    "Score": "Points",
    "Tm": "Scored",
    "Opp": "Allowed",
    "Unnamed: 3_level_1": "Kickoff_Time",
    "Unnamed: 4_level_1": "Boxscore",
    "Unnamed: 5_level_1": "Result",
    "Unnamed: 8_level_1": "Local",
    "Sp. Tms": "Special_Teams"    
}
# Apply names to columns
results_raw = results_raw.rename(columns=results_columns)
# Create list of columns
results_new_columns = [' '.join(col).strip() if "Unnamed" not in col[0] else col[1] for col in results_raw.columns.values]
results_new_columns = [col_name.replace(" ", "_") for col_name in results_new_columns]
results_new_columns = ["Opponent" if col_name == "Allowed" else col_name for col_name in results_new_columns]
# Apply new column names (flatten)
results_raw.columns = results_new_columns
results_raw.columns

In [None]:
results_raw.head(2)

In [None]:
weekly_stats_clean = results_raw.copy(deep=True)

In [None]:
# Remove empty rows
weekly_stats_clean = weekly_stats_clean[weekly_stats_clean['Day'].notna()]

In [None]:
# Convert week to str (not str default if no team postseason)
weekly_stats_clean["Week"] = weekly_stats_clean["Week"].astype(str)
weekly_stats_clean["Week"].dtype

In [None]:
# Add years and hour to date
weekly_stats_clean.loc[weekly_stats_clean["Week"].str.isnumeric(), "Date"] += f" {year} " + weekly_stats_clean.loc[weekly_stats_clean["Week"].str.isnumeric(), "Kickoff_Time"]
weekly_stats_clean.loc[~weekly_stats_clean["Week"].str.isnumeric(), "Date"] += f" {year+1} " + weekly_stats_clean.loc[weekly_stats_clean["Week"] == "Wild Card", "Kickoff_Time"] 
# Transform date str to datetime
weekly_stats_clean["Date"] = pd.to_datetime(weekly_stats_clean["Date"], format='%B %d %Y %I:%M%p ET')
weekly_stats_clean["Date"][0]

In [None]:
weekly_stats_clean.drop(columns=["Boxscore","Kickoff_Time","Day","Expected_Points_Offense", "Expected_Points_Defense",
    "Expected_Points_Special_Teams"], inplace=True)
weekly_stats_clean.columns

In [None]:
# Replace missing values with 0's
weekly_stats_clean.fillna(0, inplace=True)

In [None]:
# Modify columns data
weekly_stats_clean["OT"] = weekly_stats_clean["OT"].map(lambda x: True if x=="OT" else False)
weekly_stats_clean["Result"] = weekly_stats_clean["Result"].map(lambda x: True if x=="W" else False)
weekly_stats_clean["Local"] = weekly_stats_clean["Local"].map(lambda x: True if x!="@" else False)
weekly_stats_clean["Local"]

In [None]:
weekly_stats_clean = weekly_stats_clean.convert_dtypes()
weekly_stats_clean.dtypes

In [None]:
reg_season_stats = weekly_stats_clean.loc[weekly_stats_clean["Week"].str.isnumeric()].reset_index(drop=True)
postseason_stats = weekly_stats_clean.loc[~weekly_stats_clean["Week"].str.isnumeric()].reset_index(drop=True)

## Export data

In [None]:
folder = "./data"
if not os.path.exists(folder):
    os.makedirs(folder)
reg_season_stats.to_csv(f"{folder}/{team}_{year}_season_game_stats.csv".replace(" ","_"), index=False)
postseason_stats.to_csv(f"{folder}/{team}_{year}_postseason_game_stats.csv".replace(" ","_"), index=False)
rankings_clean.to_csv(f"{folder}/{team}_{year}_season_stats.csv".replace(" ","_"), index=False)
season_stats_clean.to_csv(f"{folder}/{team}_{year}_season_rakings.csv".replace(" ","_"), index=False)
