In [1]:
import pandas as pd
from requests import get
from bs4 import BeautifulSoup
from datetime import date

In [2]:
def get_page(url):
    response = get(url)
    html = response.content
    soup = BeautifulSoup(html, "html.parser")
    return soup

base_ff_url = 'https://www.fleaflicker.com/mlb/leagues/'
league_ids = ['21579', '21581', '21580', '21582', '21583', '21584', '21585', '21586', '21587', '21588', '21589', 
              '21590', '21591', '21592', '21593', '21594', '21595', '21596']

all_teams = []
for l in league_ids:
    url = base_ff_url + l
    soup = get_page(url)
    trs = soup.find_all('tr')
    raw_headers = trs[1].find_all('th')
    player_data = trs[2:]
    headers = []
    for header in raw_headers:
        if header.text:
            headers.append(header.text)
    exp_headers = headers + ['league_id', 'league_name', 'team_id']   
    league_name = soup.find_all('li', {'class': 'active'})[1].text.strip()
    for row in player_data:
        d_dict = dict.fromkeys(exp_headers)
        d_dict['league_id'] = l
        d_dict['league_name'] = league_name
        d_dict['Team'] = row.find('td', {'class': 'left'}).text
        d_dict['Owner'] = row.find('td', {'class': 'right'}).text
        d_dict['team_id'] = row.find('a', href=True).get('href')[-6:]
        try:
            d_dict['Rank'] = row.find_all('td', {'class': 'right text-center'})[-1].text
        except IndexError:
            d_dict['Rank'] = row.find_all('td', {'class': 'bottom right text-center'})[-1].text
        heads = ['HR', 'R','RBI','SB','OBP','OPS','SO','SV','HD','ERA','WHP','QS']
        if d_dict['Owner'] == 'Take Over':
            stats = row.find_all('span', {'class': 'nowrap'})
        else:
            stats = row.find_all('span', {'class': 'nowrap'})[1:]
        for h, s in zip(heads, stats):
            d_dict[h] = s.text
        all_teams.append(d_dict)

In [3]:
all_df = pd.DataFrame(all_teams, columns=exp_headers)
all_df.HR = all_df.HR.str.replace(",","").astype(int)
all_df.R = all_df.R.str.replace(",","").astype(int)
all_df.RBI = all_df.RBI.str.replace(",","").astype(int)
all_df.SB = all_df.SB.astype(int)
all_df.OBP = all_df.OBP.astype(float)
all_df.OPS = all_df.OPS.astype(float)
all_df.SO = all_df.SO.str.replace(",","").astype(int)
all_df.SV = all_df.SV.astype(int)
all_df.HD = all_df.HD.astype(int)
all_df.ERA = all_df.ERA.astype(float)
all_df.WHP = all_df.WHP.astype(float)
all_df.QS = all_df.QS.astype(int)

In [4]:
rank_headers = ['HR', 'R','RBI','SB','OBP','OPS','SO','SV','HD','ERA','WHP','QS']
for r in rank_headers:
    if r in ['ERA', 'WHP']:
        all_df[r+'_Points'] = all_df[r].rank(ascending=False)
    else:
        all_df[r+'_Points'] = all_df[r].rank()

In [5]:
all_df['Total_Points'] = all_df.iloc[:,-12:].sum(axis=1)
all_df['Overall_Rank'] = all_df.Total_Points.rank(ascending=False)

In [6]:
all_df.head()

Unnamed: 0,Team,Owner,HR,R,RBI,SB,OBP,OPS,SO,SV,...,OBP_Points,OPS_Points,SO_Points,SV_Points,HD_Points,ERA_Points,WHP_Points,QS_Points,Total_Points,Overall_Rank
0,Bang Bang Pop Pop,JohnnyFang,289,929,818,92,0.346,0.841,1547,81,...,125.0,151.5,202.0,166.0,220.5,155.5,203.5,139.5,2145.5,7.0
1,50/50 With Anibal,WMason,287,849,848,88,0.35,0.846,1472,56,...,166.0,174.0,177.0,80.0,178.0,35.5,105.5,144.5,1801.5,49.0
2,Harper Wallbanger,docbrock,291,861,849,84,0.347,0.842,1541,53,...,134.0,155.0,200.0,70.0,182.5,116.5,78.5,125.0,1815.0,45.0
3,Rocky Mountain High,rdknott,226,798,743,93,0.347,0.828,1418,67,...,134.0,110.5,155.0,117.5,159.5,196.0,203.5,197.5,1715.5,59.0
4,Forearm Tightness,CropRhombus,246,837,766,94,0.338,0.821,1347,18,...,57.5,84.5,124.0,13.0,222.0,126.5,191.0,149.5,1539.5,81.0


In [7]:
t_date = str(date.today())
all_df.to_csv('current_rankings_'+t_date+'.csv')

In [8]:
def make_ranks(df):
    rank_headers = ['HR', 'R','RBI','SB','OBP','OPS','SO','SV','HD','ERA','WHP','QS']
    for r in rank_headers:
        if r in ['ERA', 'WHP']:
            df[r+'_Points'] = df[r].rank(ascending=False)
        else:
            df[r+'_Points'] = df[r].rank()
    df['Total_Points'] = df.iloc[:,-12:].sum(axis=1)
    df['Overall_Rank'] = df.Total_Points.rank(ascending=False)
    return df

In [9]:
leagues = ['D2', 'D3', 'D4']

league_dfs = []
for d in leagues:
    l = all_df[all_df.league_name.str.contains(d)].reset_index(drop=True)
    league_dfs.append(l)

final_dfs = []
for l_d in league_dfs:
    d = make_ranks(l_d)
    final_dfs.append(d)

In [10]:
final_dfs[2]

Unnamed: 0,Team,Owner,HR,R,RBI,SB,OBP,OPS,SO,SV,...,OBP_Points,OPS_Points,SO_Points,SV_Points,HD_Points,ERA_Points,WHP_Points,QS_Points,Total_Points,Overall_Rank
0,Terrence Young,Terrence8026,307,895,852,67,0.357,0.874,1546,102,...,73.5,76.0,81.0,78.0,80.0,68.5,74.5,74.0,3053.5,1.0
1,HearTheBeard,hearthebeard,300,831,840,60,0.354,0.853,1130,98,...,69.5,66.5,32.0,75.5,77.5,82.0,79.0,12.0,2536.5,14.0
2,Let The Mookie Win,Tcryden,246,837,797,105,0.351,0.835,1321,66,...,62.5,43.0,58.0,39.0,77.5,42.0,64.0,40.0,2343.5,18.0
3,The Mensches,MasonO,276,761,739,76,0.338,0.842,1088,78,...,19.0,55.5,23.0,56.0,51.0,65.0,53.5,25.0,1884.5,37.0
4,Bowie Baysox,M5rich5,201,688,634,90,0.349,0.836,1352,19,...,53.5,44.0,63.0,9.0,3.5,71.5,77.0,83.0,1909.0,35.0
5,Norwood Newt,nrwdnewt,222,777,751,69,0.345,0.829,1111,47,...,37.5,39.0,29.0,22.5,19.0,75.0,41.0,42.5,1661.0,52.0
6,cancer_sticks,cancer_sticks,243,783,724,66,0.340,0.813,1376,97,...,23.0,21.0,65.0,74.0,66.5,23.5,27.0,25.0,1671.5,49.0
7,Lords of Lumber,Clarkbar36,197,669,588,92,0.360,0.853,998,72,...,79.0,66.5,15.0,47.5,8.0,7.0,9.5,28.0,1426.5,61.0
8,Ole,MatrixDream,250,774,743,73,0.350,0.870,792,5,...,59.5,75.0,1.0,2.0,30.5,1.0,3.5,16.5,1328.0,69.0
9,Black Love Soul Food,ceashley32,183,695,661,84,0.346,0.812,1068,48,...,41.0,19.5,20.0,24.0,21.0,21.0,16.0,12.0,1143.0,77.0


In [11]:
for div, name in zip(final_dfs, leagues):
    div.to_csv('current_rankings_'+name+'_'+t_date+'.csv')