In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import streamlit as st
import base64


## Note: It is .htm and not .html
#'https://www.pro-football-reference.com/years/2021/rushing.htm'

Year to choose from
Year - 1990 up to date
Team(s)
(Position)
    - QU Quarterback, RB Running Back, FB Full Back, WR Wide Receiver, TE Tight End, C Center

In [10]:

year = 2020# range(1990, 2024, 1)
url = 'https://www.pro-football-reference.com/years/' + str(year) + '/rushing.htm'
pd.read_html(url)

[    Unnamed: 0_level_0 Unnamed: 1_level_0 Unnamed: 2_level_0  \
                     Rk             Player                 Tm   
 0                    1    Derrick Henry*+                TEN   
 1                    2       Dalvin Cook*                MIN   
 2                    3       Josh Jacobs*                LVR   
 3                    4   David Montgomery                CHI   
 4                    5    Ezekiel Elliott                DAL   
 ..                 ...                ...                ...   
 379                368  Jonathan Williams                DET   
 380                369      Mike Williams                LAC   
 381                370         Javon Wims                CHI   
 382                371  Olamide Zaccheaus                ATL   
 383                372    Brandon Zylstra                CAR   
 
     Unnamed: 3_level_0 Unnamed: 4_level_0 Games     Rushing                \
                    Age                Pos     G  GS     Att   Yds  TD  1D 

In [None]:
st.header("NFL Football Stats (Rushing) Explorer")

st.markdown("""
This App performs simple web scraping of NFL Football player stats data (Focusing on Rushing)
Data Source: [https://www.pro-football-reference.com/](https://www.pro-football-reference.com/)
""")

st.sidebar.header('User Input Features')
## Sidebar - Year
selected_year = st.sidebar.selectbox("Year", list(reversed(range(1990, 2024))))


## Web scraping website
@st.cache_data
def load_data(year):
    url = 'https://www.pro-football-reference.com/years/' + str(year) + '/rushing.htm'
    html = pd.read_html(url, header=1)
    df = html[0]
    raw = df.drop(df[df["Age"] == "Age"].index)## Deletes repeating headers in content
    raw = raw.fillna(0)
    playerstats = raw.drop(columns=['Rk'], axis=1)## Dropping index since there is already one with Pandas
    return platerstats
playerstats = load_data(selected_year)

## Sidebar - Team
sorted_unique_team = sorted(playerstats['Tm'].unique())
selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team)

## Sidebar - Position
unique_pos = ['QU', 'RB', 'FB', 'WR', 'TE', 'C']
selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos)

## Filtering Data
df_selected_team = playerstats[(playerstats['Tm'].isin(selected_team)) & (playerstats['Pos'].isin(selected_pos))]

st.header('Display PlayerStats of Selected Team(s)')
st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + 'rows and ' + str(df_selected_team.shape[1]) + 'columns.')
st.dataframe(df_selected_team)

## Download NBA Player Stats Data
def filedownload(df):
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()## Strings <-> Bytes
    href = f'<a href="data:file/csv;base64,{b64}" download="playerstats.csv">Download csv file</a>'
    return href

st.markdown(filedownload(df_selected_team), unsafe_allow_html=True)

## Heatmap
## Load new csv file
if st.button('Intercorrelation Heatmap'):
    st.header('Intercorrelation Matrix Heatmap')
    df_selected_team.to_csv('output.csv', index=False)
    df = pd.read_csv('output.csv')
    
    corr = df.corr()
    mask = np.zeros_like(a=corr)
    mask[np.triu_indices_from(mask)] = True
    with sns.axes_style("white"):
        fig, ax = plt.subplots(figsize=(7, 5))
        ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
    st.pyplot()
    
    


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#%matplotlib inline
import seaborn as sns

import streamlit as st
import base64


st.header("NFL Football Stats (Rushing) Explorer")

st.markdown("""
This App performs simple web scraping of NFL Football player stats data (Focusing on Rushing)
Data Source: [https://www.pro-football-reference.com/](https://www.pro-football-reference.com/)
""")

st.sidebar.header('User Input Features')
## Sidebar - Year
selected_year = st.sidebar.selectbox("Year", list(reversed(range(1990, 2024))))


## Web scraping website
@st.cache_data
def load_data(year):
    url = 'https://www.pro-football-reference.com/years/' + str(year) + '/rushing.htm'
    html = pd.read_html(url, header=1)
    df = html[0]
    raw = df.drop(df[df["Age"] == "Age"].index)## Deletes repeating headers in content
    raw = raw.fillna(0)
    playerstats = raw.drop(columns=['Rk'], axis=1)## Dropping index since there is already one with Pandas
    return playerstats
playerstats = load_data(selected_year)

## Sidebar - Team
sorted_unique_team = sorted(playerstats['Tm'].unique())
selected_team = st.sidebar.multiselect('Team', sorted_unique_team, sorted_unique_team)

## Sidebar - Position
unique_pos = ['QU', 'RB', 'FB', 'WR', 'TE', 'C']
selected_pos = st.sidebar.multiselect('Position', unique_pos, unique_pos)

## Filtering Data
df_selected_team = playerstats[(playerstats['Tm'].isin(selected_team)) & (playerstats['Pos'].isin(selected_pos))]

st.header('Display PlayerStats of Selected Team(s)')
st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + 'rows and ' + str(df_selected_team.shape[1]) + 'columns.')
st.dataframe(df_selected_team)

## Download NBA Player Stats Data
def filedownload(df):
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()## Strings <-> Bytes
    href = f'<a href="data:file/csv;base64,{b64}" download="playerstats.csv">Download csv file</a>'
    return href

st.markdown(filedownload(df_selected_team), unsafe_allow_html=True)

## Heatmap
## Load new csv file
if st.button('Intercorrelation Heatmap'):
    st.header('Intercorrelation Matrix Heatmap')
    df_selected_team.to_csv('output.csv', index=False)
    df = pd.read_csv('output.csv')
    
    corr = df.corr(numeric_only=True)
    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True
    with sns.axes_style("white"):
        fig, ax = plt.subplots(figsize=(7, 5))
        ax = sns.heatmap(corr, mask=mask, vmax=1, square=True, annot=True, fmt='.2f')
    st.pyplot(fig)

    