In [1]:
import pandas as pd
from requests import get
from bs4 import BeautifulSoup
from datetime import date

In [2]:
def get_page(url):
    response = get(url)
    html = response.content
    soup = BeautifulSoup(html, "html.parser")
    return soup

base_ff_url = 'https://www.fleaflicker.com/mlb/leagues/'
league_ids = ['21579', '21581', '21580', '21582', '21583', '21584', '21585', '21586', '21587', '21588', '21589', 
              '21590', '21591', '21592', '21593', '21594', '21595', '21596']

all_teams = []
for l in league_ids:
    url = base_ff_url + l
    soup = get_page(url)
    trs = soup.find_all('tr')
    raw_headers = trs[1].find_all('th')
    player_data = trs[2:]
    headers = []
    for header in raw_headers:
        if header.text:
            headers.append(header.text)
    exp_headers = headers + ['league_id', 'league_name', 'team_id']   
    league_name = soup.find_all('li', {'class': 'active'})[1].text.strip()
    for row in player_data:
        d_dict = dict.fromkeys(exp_headers)
        d_dict['league_id'] = l
        d_dict['league_name'] = league_name
        d_dict['Team'] = row.find('td', {'class': 'left'}).text
        d_dict['Owner'] = row.find('td', {'class': 'right'}).text
        d_dict['team_id'] = row.find('a', href=True).get('href')[-6:]
        try:
            d_dict['Rank'] = row.find_all('td', {'class': 'right text-center'})[-1].text
        except IndexError:
            d_dict['Rank'] = row.find_all('td', {'class': 'bottom right text-center'})[-1].text
        heads = ['HR', 'R','RBI','SB','OBP','OPS','SO','SV','HD','ERA','WHP','QS']
        if d_dict['Owner'] == 'Take Over':
            stats = row.find_all('span', {'class': 'nowrap'})
        else:
            stats = row.find_all('span', {'class': 'nowrap'})[1:]
        for h, s in zip(heads, stats):
            d_dict[h] = s.text
        all_teams.append(d_dict)

In [4]:
all_df = pd.DataFrame(all_teams, columns=exp_headers)
all_df.HR = all_df.HR.str.replace(",","").astype(int)
all_df.R = all_df.R.str.replace(",","").astype(int)
all_df.RBI = all_df.RBI.str.replace(",","").astype(int)
all_df.SB = all_df.SB.astype(int)
all_df.OBP = all_df.OBP.astype(float)
all_df.OPS = all_df.OPS.astype(float)
all_df.SO = all_df.SO.str.replace(",","").astype(int)
all_df.SV = all_df.SV.astype(int)
all_df.HD = all_df.HD.astype(int)
all_df.ERA = all_df.ERA.astype(float)
all_df.WHP = all_df.WHP.astype(float)
all_df.QS = all_df.QS.astype(int)

In [5]:
rank_headers = ['HR', 'R','RBI','SB','OBP','OPS','SO','SV','HD','ERA','WHP','QS']
for r in rank_headers:
    if r in ['ERA', 'WHP']:
        all_df[r+'_Points'] = all_df[r].rank(ascending=False)
    else:
        all_df[r+'_Points'] = all_df[r].rank()

In [6]:
all_df['Total_Points'] = all_df.iloc[:,-12:].sum(axis=1)
all_df['Overall_Rank'] = all_df.Total_Points.rank(ascending=False)

In [7]:
all_df.head()

Unnamed: 0,Team,Owner,HR,R,RBI,SB,OBP,OPS,SO,SV,...,OBP_Points,OPS_Points,SO_Points,SV_Points,HD_Points,ERA_Points,WHP_Points,QS_Points,Total_Points,Overall_Rank
0,Bang Bang Pop Pop,JohnnyFang,148,509,436,47,0.342,0.82,886,51,...,98.5,117.0,198.5,164.0,211.0,178.0,187.5,148.5,1892.5,24.0
1,Swinger Bell,WMason,152,509,471,48,0.355,0.845,852,27,...,200.5,178.5,175.5,60.0,190.5,66.0,112.5,114.0,1778.0,43.0
2,Harper Wallbanger,Prime416,181,494,524,47,0.344,0.852,867,27,...,126.5,196.5,188.0,60.0,161.0,74.0,62.5,93.0,1718.0,55.5
3,Ten Points for Griffeyndor,coreyjro,139,508,442,33,0.344,0.815,818,40,...,126.5,104.5,144.0,113.0,214.0,142.0,112.5,135.5,1563.5,84.0
4,Forearm Tightness,CropRhombus,145,478,449,52,0.341,0.827,788,11,...,89.0,137.0,126.5,12.0,211.0,131.5,187.5,122.5,1595.0,79.0


In [8]:
t_date = str(date.today())
all_df.to_csv('current_rankings_'+t_date+'.csv')