In [1]:
from tqdm.notebook import tqdm
from cfb import models as cfb_models
import pandas as pd; DF=pd.DataFrame
import numpy as np; import requests

import warnings
warnings.filterwarnings("ignore")

### Read in and Clean "Coaches" DF

In [27]:
coaches_df=pd.read_csv('_Coaches8.csv')
# Clean Numerical Columns and conver to floats
# Make Pay in millions of dollars
cols=['SchoolPay','TotalPay','Bonus','BonusPaid','AssistantPay','Buyout']
replace_list = ['$',',','-']
for col in cols:
    coaches_df[col]=coaches_df[col].astype(str).str.strip()
    for char in replace_list:
        coaches_df[col]=coaches_df[col].str.replace(char,'')
    coaches_df[col]=coaches_df[col].replace('',np.nan)

### Create Season/Coach/Conference Instances

In [30]:
# Create Season Instances
for year in range(1900,2100):
    season,c = Season.objects.get_or_create(
        year=year,defaults=dict(year=year)
    )
season = Season.objects.get(year=2019)
def main():
    print(f'------------- FB Season: {season} ------------------')
    for tup in tqdm(coaches_df.itertuples()):
        # get or create school
        school,c=cfb_models.School.objects.get_or_create(
            name=tup.School,defaults=dict(name=tup.School)
        )
        # get or create team
        team,c=cfb_models.Team.objects.get_or_create(
            school=school,type='Football',
            defaults=dict(school=school,type='Football')
        )
        # get or create conference
        conf,c=cfb_models.Conference.objects.get_or_create(
            name=tup.Conference,defaults=dict(name=tup.Conference)
        )

        # get or create coach
        coach_name_arr = tup.Coach.strip().split(' ')
        cfn=coach_name_arr[0]; cln=coach_name_arr[1]
        coach,c = cfb_models.Coach.objects.get_or_create(
            firstName=cfn,lastName=cln,
            defaults=dict(firstName=cfn,lastName=cln)
        )

        # get or create team season
        tseason,c = TeamSeason.objects.update_or_create(
            team=team,season=season,conference=conf,coach=coach,
            defaults=dict(team=team,season=season,conference=conf,coach=coach)
        )

        cpay,c = CoachPay.objects.update_or_create(
            teamseason=tseason,base=tup.SchoolPay,total=tup.TotalPay,
            bonus=tup.Bonus,bonusPaid=tup.BonusPaid,buyout=tup.Buyout,
            defaults=dict(
                teamseason=tseason,base=tup.SchoolPay,total=tup.TotalPay,
                bonus=tup.Bonus,bonusPaid=tup.BonusPaid,buyout=tup.Buyout,        
            )
        )


### Scrape cfbstats.com to extract url id for each team

In [31]:
#### Get Teams URL ID from cfbstats

def translate_team_name(old_name):
    team_match_dict={
        'BYU':'Brigham Young',"Hawai'i":'Hawaii', 
        'Miami (Florida)':'Miami (Fla.)', 'SMU':'Southern Methodist', 
        'TCU':'Texas Christian', 'UAB':'Alabama at Birmingham', 
        'UCF':'Central Florida', 'UNLV':'Nevada-Las Vegas',
        'USC':'Southern California','UTEP':'Texas-El Paso', 
        'UTSA':'Texas-San Antonio','Appalachian St.':'Appalachian State',
        'Fla. Atlantic':'Florida Atlantic',"Florida Int'l":'Florida International',
        'Western Ky.':'Western Kentucky','Middle Tenn. St.':'Middle Tennessee'
    } 
    old_name = replace_abbrevs(old_name)
    if old_name in team_match_dict.keys():
        new_name = team_match_dict[old_name]    
    else:
        new_name = old_name
    return new_name

def replace_abbrevs(old_name):
    new_name=old_name
    replace_arr=[
        ['St.','State'],
        ['Ala.','Alabama'],
        ['Ga.','Georgia'],
        ['Fla.','Florida'],
        ['Ill.','Illinois'],
        ['Caro.','Carolina'],
        ['Ky.','Kentucky'],
        ['La.','Louisiana'],
        ['Mich.','Michigan'],
        ['Miss.','Mississippi'],
    ]
    for r in replace_arr:
        new_name=new_name.replace(r[0],r[1])
    return new_name
    
def get_team_home_page_dfs_from_cfbstats(team_id):
    url = f'http://www.cfbstats.com/2019/team/{team_id}/index.html'
    r=requests.get(url)
    try:
        dfs=pd.read_html(r.content)
    except ValueError:
        dfs=[]
    return dfs

def main():
    found_count=0
    target = len(coaches_df)
    for i in tqdm(range(0,1000)):
        urlId=str(i)
        dfs=get_team_home_page_dfs_from_cfbstats(i)
        if len(dfs)==0: continue
        found_count+=1
        tname = dfs[0].columns[1]
        tname = translate_team_name(tname)
        team = Team.objects.filter(school__name=tname).first()

        cfbs_id,c = CfbstatsUrlId.objects.update_or_create(
            team=team,urlId=urlId,
            defaults=dict(team=team,urlId=urlId)
        )
#main()

#  -----   Get 2019 Season Stats  -----

### Get Record for each team

In [67]:
teamseason = TeamSeason.objects.order_by('?').first()
dfs = get_team_home_page_dfs_from_cfbstats(teamseason.team.cfbs.urlId)
team_stats_df = dfs[0]
team_stats_df = team_stats_df.rename(columns={'Unnamed: 0':'stat_str'})
tname = team_stats_df.columns[1]
team_stats_df = team_stats_df.rename(columns={tname:'value'})

game_result_tdf = dfs[1]
record_df = dfs[2]

def get_loc(val):
    if val[0]=='@':
        loc = 'A' 
    elif val[0]=='+':
        loc = 'N'
    else:
        loc = 'H'
    return loc

game_result_df = game_result_tdf.iloc[:-1,:].copy()
game_result_df['loc']=game_result_df['Opponent'].map(get_loc)
game_result_df['Opponent']=game_result_df['Opponent'].str.replace('@','').str.replace('+','')
game_result_df['Date']=pd.to_datetime(game_result_df['Date'])
game_result_df=game_result_df.rename(columns={'Game Time':'Game_Time'})

print(f'----- {teamseason} ---------')
print(f'--------------------------------------')
print('------------- Records -------------')
for tup in record_df.itertuples():
    desc=tup.Split
    record = tup.Record.strip().split('-')
    win=record[0]
    loss=record[1]
    
    ts_record,c = TeamSeasonRecord.objects.update_or_create(
        teamseason=teamseason,desc=desc,win=win,loss=loss,
        defaults=dict(teamseason=teamseason,desc=desc,win=win,loss=loss)
    )
    print(ts_record)
    
print('------------- Game Results -------------')
for tup in game_result_df.itertuples():
    date = tup.Date
    attendance = tup.Attendance
    result_str = tup.Result
    result = result_str[0]
    if result not in ['L','W']: result = 'T'
    score_str = result_str[2:]
    score1 = score_str.split('-')[0]
    score2 = score_str.split('-')[1]
    score_arr = [int(score1),int(score2)]
    if result=='W':
        score,oscore=(max(score_arr),min(score_arr))
    else:
        score,oscore=(min(score_arr),max(score_arr))
    
    oname=tup.Opponent.strip()\
        .replace('St.','State').replace('Ala.','Alabama')
    if oname[0].isnumeric():
        orank = oname.split(' ')[0]
        oname = " ".join(oname.split(' ')[1:])
    else:
        orank=None
    oname = translate_team_name(oname)       
    
    glen = tup.Game_Time
    glen = float(glen.split(':')[0]) + float(glen.split(':')[1])/60
    
    opponent = TeamSeason.objects.filter(
        season=teamseason.season,
        team__school__name=oname
    ).first()

    gr,c = GameResult.objects.update_or_create(
        teamseason=teamseason,
        oname=oname,
        date=date,
        defaults=dict(
            teamseason=teamseason,
            opponent=opponent,
            oname=oname,
            date=date,
            score=score,
            oscore=oscore,
            result=result,
            attendance=attendance,
            length=glen,
        )
    )
    print(gr)
print('------------- Team Season Stats -------------')
for tup in team_stats_df.itertuples():
    stat_str = tup.stat_str
    category=(stat_str.split(':')[0]).replace('/','per')
    desc_str=(stat_str.split(':')[1]).strip() if len(stat_str.split(':'))>1 else category


    desc_arr = desc_str.replace('/','per').split(' - ')
    value_arr = tup.value.replace('%','').split(' - ')
    ovalue_arr = tup.Opponents.replace('%','').split(' - ')
    
    vtups = list(zip(desc_arr,value_arr,ovalue_arr))
    for vtup in vtups:
        desc=vtup[0]
        value=vtup[1]
        ovalue=vtup[2]
        
        if ':' in value:
            value = float(value.split(':')[0]) + float(value.split(':')[1])/60
            ovalue = float(ovalue.split(':')[0]) + float(ovalue.split(':')[1])/60
        
        tss,c = TeamSeasonStat.objects.update_or_create(
            teamseason=teamseason,
            category=category,
            desc=desc,
            value=value,
            ovalue=ovalue,
        )
        print(tss)

----- 2019, Football, Colorado ---------
--------------------------------------
------------- Records -------------
2019, Football, Colorado, All Games, 5-7
2019, Football, Colorado, at Home, 3-3
2019, Football, Colorado, on Road/Neutral Site, 2-4
2019, Football, Colorado, vs. Conference, 3-6
2019, Football, Colorado, vs. Non-Conference, 2-1
2019, Football, Colorado, vs. Ranked (AP), 0-4
2019, Football, Colorado, vs. Unranked (AP), 5-3
2019, Football, Colorado, vs. FBS (I-A), 5-7
2019, Football, Colorado, vs. FCS (I-AA), 0-0
2019, Football, Colorado, vs. FBS Winning, 2-4
2019, Football, Colorado, vs. FBS Non-Winning, 3-3
2019, Football, Colorado, vs. BCS AQ, 4-6
2019, Football, Colorado, vs. BCS non-AQ, 1-1
2019, Football, Colorado, vs. FBS Power 5, 4-6
2019, Football, Colorado, vs. FBS non-Power 5, 1-1
2019, Football, Colorado, in August/September, 3-1
2019, Football, Colorado, in October, 0-4
2019, Football, Colorado, in November, 2-2
2019, Football, Colorado, in December/January, 0-

ValueError: could not convert string to float: '-'

2019, Football, Michigan, Scoring-|-PointsperGame, 31.7 [opp(20.7)]
2019, Football, Michigan, Scoring-|-Games, 13.0 [opp(13.0)]
2019, Football, Michigan, Scoring-|-Points, 412.0 [opp(269.0)]
2019, Football, Michigan, First Downs-|-Total, 286.0 [opp(226.0)]
2019, Football, Michigan, First Downs-|-Rushing, 108.0 [opp(95.0)]
2019, Football, Michigan, First Downs-|-Passing, 141.0 [opp(103.0)]
2019, Football, Michigan, First Downs-|-By Penalty, 37.0 [opp(28.0)]
2019, Football, Michigan, Rushing-|-Yards per Attempt, 3.96 [opp(3.16)]
2019, Football, Michigan, Rushing-|-Attempts, 495.0 [opp(501.0)]
2019, Football, Michigan, Rushing-|-Yards, 1959.0 [opp(1582.0)]
2019, Football, Michigan, Rushing-|-TD, 26.0 [opp(20.0)]
2019, Football, Michigan, Passing-|-Rating, 138.15 [opp(126.2)]
2019, Football, Michigan, Passing-|-Yards, 3261.0 [opp(2412.0)]
2019, Football, Michigan, Passing-|-Attempts, 410.0 [opp(345.0)]
2019, Football, Michigan, Passing-|-Completions, 228.0 [opp(198.0)]
2019, Football, Mich

In [47]:
tname = team_stats_df.columns[1]
team_stats_df = team_stats_df.rename(columns={tname:'value'})

In [48]:
team_stats_df

Unnamed: 0,stat_str,value,Opponents
0,Scoring: Points/Game,31.7,20.7
1,Scoring: Games - Points,13 - 412,13 - 269
2,First Downs: Total,286,226
3,First Downs: Rushing - Passing - By Penalty,108 - 141 - 37,95 - 103 - 28
4,Rushing: Yards / Attempt,3.96,3.16
5,Rushing: Attempts - Yards - TD,495 - 1959 - 26,501 - 1582 - 20
6,Passing: Rating,138.15,126.20
7,Passing: Yards,3261,2412
8,Passing: Attempts - Completions - Interception...,410 - 228 - 9 - 25,345 - 198 - 9 - 16
9,Total Offense: Yards / Play,5.77,4.72
