# Dallas Cowboys Season 2021 stats

In [None]:
import pandas as pd

## Data Extraction

In [None]:
dallas_url = "https://www.pro-football-reference.com/teams/dal/2021.htm"

In [None]:
dfs = pd.read_html(dallas_url)
season_stats_raw = dfs[0]
weekly_stats_raw = dfs[1]
redzone_downs_raw = dfs[0]

# Data Cleaning

In [None]:
print(f"Shape: {weekly_stats_raw.shape}")
weekly_stats_raw.info()

In [None]:
weekly_stats_raw.dtypes

In [None]:
# Create column new names dict
column_dict = {
    "TotYd": "Total_Yards",
    "1stD":  "First_Downs",
    "TO": "Turnovers",
    "RushY": "Rush_Yards",
    "PassY": "Pass_Yards",
    "Rec": "Record",
    "Score": "Points",
    "Tm": "Scored",
    "Opp": "Allowed",
    "Unnamed: 0_level_0": "",
    "Unnamed: 1_level_0": "",
    "Unnamed: 2_level_0": "",
    "Unnamed: 3_level_0": "",
    "Unnamed: 4_level_0": "",
    "Unnamed: 5_level_0": "",
    "Unnamed: 6_level_0": "",
    "Unnamed: 7_level_0": "",
    "Unnamed: 8_level_0": "",
    "Unnamed: 9_level_0": "",
    "Unnamed: 3_level_1": "Kickoff_Time",
    "Unnamed: 4_level_1": "Boxscore",
    "Unnamed: 5_level_1": "Result",
    "Unnamed: 8_level_1": "Local",
    "Sp. Tms": "Special_Teams"    
}
# Apply names to columns
weekly_stats_raw = weekly_stats_raw.rename(columns=column_dict)
# Create list of columns
week_new_columns = [' '.join(col).strip() for col in weekly_stats_raw.columns.values]
week_new_columns = [col_name.replace(" ", "_") for col_name in week_new_columns]
week_new_columns = ["Opponent" if col_name == "Allowed" else col_name for col_name in week_new_columns]
# Apply new column names (flatten)
weekly_stats_raw.columns = week_new_columns

In [None]:
weekly_stats_clean = weekly_stats_raw.copy(deep=True)

In [None]:
# Remove empty rows
weekly_stats_clean = weekly_stats_clean[weekly_stats_clean['Day'].notna()]

In [None]:
# Modify columns data
weekly_stats_clean["OT"] = weekly_stats_clean["OT"].map(lambda x: True if x=="OT" else False)
weekly_stats_clean["Result"] = weekly_stats_clean["Result"].map(lambda x: True if x=="W" else False)
weekly_stats_clean["Local"] = weekly_stats_clean["Local"].map(lambda x: True if x!="@" else False)

In [None]:
# Add years and hour to date
weekly_stats_clean.loc[weekly_stats_clean["Week"].str.isnumeric(), "Date"] += " 2021 " + weekly_stats_clean.loc[weekly_stats_clean["Week"].str.isnumeric(), "Kickoff_Time"]
weekly_stats_clean.loc[weekly_stats_clean["Week"] == "Wild Card", "Date"] += " 2022 " + weekly_stats_clean.loc[weekly_stats_clean["Week"] == "Wild Card", "Kickoff_Time"] 
# Transform date str to datetime
weekly_stats_clean["Date"] = pd.to_datetime(weekly_stats_clean["Date"], format='%B %d %Y %I:%M%p ET')
weekly_stats_clean["Date"][0]

In [None]:
weekly_stats_clean.drop(columns=["Boxscore","Kickoff_Time","Day"], inplace=True)
weekly_stats_clean.columns

In [None]:
# Replace missing values with 0's
weekly_stats_clean.fillna(0, inplace=True)

In [None]:
reg_season_stats = weekly_stats_clean.iloc[:-1,:].convert_dtypes()
postseason_stats = weekly_stats_clean.iloc[-1,:].to_frame().convert_dtypes()

### Data Exploration

#### Regular Season Stats

In [None]:
reg_season_stats.describe()