# Imports

In [None]:
import numpy as np
import pandas as pd
import nfl_data_py as nfl

# Importing NFL Data

In [None]:
years = range(1999, 2023 + 1)

In [None]:
# df_roster = nfl.import_seasonal_rosters(years)
# df_roster.to_pickle("./df_roster_99_23.pkl")

In [None]:
df_roster = pd.read_pickle("./construction_dfs/df_roster_99_23.pkl")

In [None]:
# df_seasonal = nfl.import_seasonal_data(years)
# df_seasonal.to_pickle("./seasonal_99_23.pkl")

In [None]:
df_seasonal = pd.read_pickle("./construction_dfs/seasonal_99_23.pkl")

In [None]:
df_seasonal.head()

# Creating Data Set

In [None]:
df_roster_seasonal = df_roster[["season", "position", "player_name", "player_id", "age", 'team']]

In [None]:
df_roster_seasonal.query("player_name == 'Mike Evans'")

In [None]:
df_roster_seasonal.reset_index(inplace=True)

In [None]:
df_merge = df_seasonal.merge(df_roster_seasonal, on=["player_id", "season"], how="left")

In [None]:
df_merge.query("player_name == 'Mike Evans'")

In [None]:
df_merge.head()

## Cleaning up Team for future use

In [None]:
team_mapping = {
    'CLE': 'Browns',
    'MIA': 'Dolphins',
    'IND': 'Colts',
    'TB': 'Buccaneers',
    'CHI': 'Bears',
    'NE': 'Patriots',
    'DAL': 'Cowboys',
    'PHI': 'Eagles',
    'SF': '49ers',
    'KC': 'Chiefs',
    'MIN': 'Vikings',
    'WAS': 'Redskins', # for init mapping - 2022 and onward is the commanders in draft data
    'SD': 'Chargers',
    'DET': 'Lions',
    'DEN': 'Broncos',
    'NYG': 'Giants',
    'HST': 'Texans',
    'NO': 'Saints',
    'BAL': 'Ravens',
    'PIT': 'Steelers',
    'ATL': 'Falcons',
    'NYJ': 'Jets',
    'ARZ': 'Cardinals',
    'SEA': 'Seahawks',
    'GB': 'Packers',
    'JAX': 'Jaguars',
    'CIN': 'Bengals',
    'ARI': 'Cardinals',
    'CAR': 'Panthers',
    'BLT': 'Ravens',
    'BUF': 'Bills',
    'SL': 'Rams',  # St. Louis Rams
    'CLV': 'Browns',
    'OAK': 'Raiders',
    'STL': 'Rams',  # St. Louis Rams, same as 'SL'
    'TEN': 'Titans',
    'LAC': 'Chargers',
    'LV': 'Raiders',
    'HOU': 'Texans',
    'LA': 'Rams',  # Los Angeles Rams
}

In [None]:
df_merge['team'] = df_merge['team'].replace(team_mapping)

In [None]:
df_merge.loc[(df_merge['season'] >= 2022) & (df_merge['team'] == 'Redskins'), 'team'] = 'Commanders'

# Adding Fantasy Rank

In [None]:
df_filter = df_merge.copy() \
    .query("position == 'QB' or position == 'RB' or position == 'WR' or position == 'TE'")

In [None]:
df_filter['rank'] = df_filter \
    .groupby(['season', 'position'])['fantasy_points_ppr'] \
    .rank(method='first', ascending=False)

In [None]:
df_filter.query("player_name == 'Mike Evans'")

In [None]:
def assign_tier(rank):
    return np.ceil(rank / 12)

In [None]:
df_filter['tier'] = df_filter['rank'].apply(assign_tier)

In [None]:
df_filter.query("player_name == 'Mike Evans'")

# Adding Draft Data

### Grabbing All Draft Years

In [None]:
df_qb_draft = pd.read_pickle("./draft_dfs/draft_order_qb_1937_2023.pkl")
df_rb_draft = pd.read_pickle("./draft_dfs/draft_order_rb_1936_2023.pkl")
df_wr_draft = pd.read_pickle("./draft_dfs/draft_order_wr_1945_2023.pkl")
df_te_draft = pd.read_pickle("./draft_dfs/draft_order_te_1953_2023.pkl")

In [None]:
df_qb_draft.head()

### Helper Functions for Combining the Data

In [None]:
draft_dfs = {
    'QB': df_qb_draft,
    'RB': df_rb_draft,
    'WR': df_wr_draft,
    'TE': df_te_draft
}

In [None]:
import re

def parse_latest_college(text):
    # Regex pattern to find college names followed by years in parentheses
    pattern = r"([A-Za-z\s&]+)\s\((\d{4})(?:\s?[-–]\s?(\d{4}))?\)"
    matches = re.findall(pattern, text)
    
    latest_year = 0
    latest_college = None
    
    for match in matches:
        college, start_year, end_year = match
        college = college.strip()
        start_year = int(start_year)
        
        # If end year is not specified, assume it's the same as start year
        if end_year:
            end_year = int(end_year)
        else:
            end_year = start_year
        
        # Check if this is the most recent year
        if end_year > latest_year:
            latest_year = end_year
            latest_college = college
    
    return latest_college

def parse_draft_info(text):
    # Regex pattern for draft info
    draft_pattern = r"(\d{4})\s*/\s*Round:\s*(\d+)\s*/\s*Pick:\s*(\d+)"
    undrafted_pattern = r"(\d{4})"
    
    # Try to match the draft pattern
    draft_match = re.search(draft_pattern, text)
    if draft_match:
        # Extract year, round, and pick from the draft info
        year, round_, pick = draft_match.groups()
        return {
            "Year": int(year),
            "Round": int(round_),
            "Pick": int(pick)
        }
    
    # If no draft info, try to match the undrafted pattern
    undrafted_match = re.search(undrafted_pattern, text)
    if undrafted_match:
        year = undrafted_match.group(1)
        return {
            "Year": int(year),
            "Round": "Undrafted",
            "Pick": "Undrafted"
        }
    
    # Return None if no valid format is found (or adjust as necessary)
    return {
            "Year": None,
            "Round": None,
            "Pick": None
        }

In [None]:
import requests
from bs4 import BeautifulSoup

def grab_empty_query(url, position):
    """"""
    # Fetch the page
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # undrafted case
        if soup.find('th', string='Undrafted:'):
            draft_info = soup.find('th', string='Undrafted:').find_next_sibling('td').text
            
        # drafted case
        elif soup.find('th', string='NFL draft:'):
            draft_info = soup.find('th', string='NFL draft:').find_next_sibling('td').text
        
        # we ran into a problem
        else:
            print("retry")
            add_on_dict = {
                "QB": '(quarterback)',
                "RB": '(running_back)',
                "WR": '(wide_receiver)',
                "TE": '(tight_end)'
            }
            response = requests.get(f"{url}_{add_on_dict[position]}")
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # undrafted case
            if soup.find('th', string='Undrafted:'):
                draft_info = soup.find('th', string='Undrafted:').find_next_sibling('td').text

            # drafted case
            elif soup.find('th', string='NFL draft:'):
                draft_info = soup.find('th', string='NFL draft:').find_next_sibling('td').text
                
            else:
                return {
                "Year": None,
                "Round": None,
                "Pick": None,
                "College": None
            }

        
        # parsing draft data from result of soup
        player_info = parse_draft_info(draft_info)
        
        # parsing college info from soup
        if soup.find('th', string='College:'):
            college_info = soup.find('th', string='College:').find_next_sibling('td').text
            college = parse_latest_college(college_info)
        else:
            college = None

        # update player_info
        player_info["College"] = college
            
        return player_info
    else:
        print(url)
        print("Failed to fetch the page")
        return {
            "Year": None,
            "Round": None,
            "Pick": None,
            "College": None
        }

In [None]:
# url = "https://en.wikipedia.org/wiki/Case_Keenum" # undrafted case
# grab_empty_query(url, 'QB')

In [None]:
# url = "https://en.wikipedia.org/wiki/Tank_Dell" # drafted case
# grab_empty_query(url, 'WR')

In [None]:
def grab_draft_info(player_name, position, team):
    """"""
    df = draft_dfs[position]
    draft_df = df.copy().query(f"Name == @player_name")
    
    if draft_df.empty:
        url = f"https://en.wikipedia.org/wiki/{'_'.join(player_name.split(' '))}"
        player_info = grab_empty_query(url, position)
        
        draft_df.at[0, "Year"] = player_info["Year"]
        draft_df.at[0, "No."] = None
        draft_df.at[0, "Round"] = player_info["Round"]
        if player_info["Round"] and player_info['Round'] != 'Undrafted':
            draft_df.at[0, "Pick"] = player_info["Pick"] - ((player_info["Round"] - 1) * 32)
        else:
            draft_df.at[0, "Pick"] = None
        draft_df.at[0, "Player"] = player_info["Pick"]
        draft_df.at[0, "Name"] = player_name
        draft_df.at[0, "Team"] = team
        draft_df.at[0, "College"] = player_info["College"]
        
    elif len(draft_df) > 1:
        draft_df = draft_df[draft_df['Team'] == team]
        
    return draft_df

### Combining the Data

In [None]:
current_player = df_filter["player_name"].iloc[0]
first_team = df_filter["team"].iloc[0]
failed_cases = []

for i, row in df_filter.copy().iloc[15:].iterrows():
    player_name = row["player_name"]
    position = row["position"]
    team = row['team']
    
    if player_name != current_player:
        current_player = player_name
        first_team = team
    
    draft_df = grab_draft_info(player_name, position, first_team)
    
    if len(draft_df) == 1:
        # Extract the draft details from the temp DataFrame
        draft_details = draft_df.iloc[0]  # Assuming there's only one matching row

        # Update df_filter with the draft details for this player
        df_filter.at[i, 'Draft Year'] = draft_details['Year']
        df_filter.at[i, 'Draft No.'] = draft_details['No.']
        df_filter.at[i, 'Draft Round'] = draft_details['Round']
        df_filter.at[i, 'Draft Pick'] = draft_details['Pick']
        df_filter.at[i, 'Draft Overall'] = draft_details['Player']
        df_filter.at[i, 'Draft Team'] = draft_details['Team']
        df_filter.at[i, 'College'] = draft_details['College']
    else:
        print()
        print("else case")
        print(player_name, position, team)
        print(draft_df)
        failed_cases.append((player_name, position, team))

In [None]:
print(failed_cases)

## Fixing Draft Add Ons

Names that are mismatched:
    - Tank Dell -> Nathaniel Dell
    
Need to also fill down - some players are duplicate names and have changed teams!

In [None]:
# Specify the year you are interested in
specific_year = 2018

# Filter for rows where 'Draft Team' is empty (NaN or None) and the 'Season' matches the specific year
missing_draft_team = df_filter[(df_filter['season'] == specific_year) & (df_filter['Draft Team'].isna())][["player_name"]]

# Display the filtered DataFrame
print(missing_draft_team)

In [None]:
df_filter.query("player_name == 'Case Keenum'")

# Adding Contract Data

# Adding Team Mate Data

# Adding College Data

# Saving Data Set

In [None]:
df_filter.to_pickle("./basic_99_23.pkl")