In [17]:
import requests
from bs4 import BeautifulSoup
from lxml import html
from lxml import etree
import pandas as pd

import sqlite3
import datetime
today_date = datetime.datetime.today().strftime('%Y-%m-%d')

import config

# Scraping
___
## Get boxes

In [18]:
class daily_lineups(object):
    def __init__(self):
        self.lineup_url = 'https://www.rotowire.com/baseball/daily_lineups.htm'
        self.page = requests.get(self.lineup_url)
        self.tree = html.fromstring(self.page.content)
        self.div_selection = "contains(@class,'lineup is-mlb') and not(contains(@class,'is-tools'))"
        self.info_locations = {'team':"//div[{}][{}]/div[2]/div/div/div[{}]/div[1]/text()",
                               'pitcher':"//div[{}][{}]/div[2]/div[2]/ul[{}]/li[1]/div/a",
                               'players':"//div[{}][{}]/div[2]/div[2]/ul[{}]/li/a",
                               'positions':"//div[{}][{}]/div[2]/div[2]/ul[{}]/li/div[@class='lineup__pos']"}
    def main(self):
        games = []
        games_today = len(self.tree.xpath("//div[{}]".format(self.div_selection)))
        teams = [1,2]
        for game_num in range(games_today):
            game_num = game_num + 1
            game = []
            for team in teams:
                team_temp = self.tree.xpath(self.info_locations['team'].format(self.div_selection,game_num,team))
                pitcher_temp = self.tree.xpath(self.info_locations['pitcher'].format(self.div_selection,game_num,team))
                position_temp = self.tree.xpath(self.info_locations['positions'].format(self.div_selection,game_num,team))
                player_temp = self.tree.xpath(self.info_locations['players'].format(self.div_selection,game_num,team))
                game += self.get_players(player_temp, position_temp, team_temp, pitcher_temp, game_num)
            games += game
        return games
        

    def get_team(self, team_scrape):
        try:
            team = team_scrape[0]
        except:
            team = None
        return team
    def get_pitcher(self, pitcher_scrape):
        pitcher = pitcher_scrape[0].text
        return pitcher
    def get_players(self, players_scrape, position_scrape, team_scrape, pitcher_scrape, game_num):
        team = self.get_team(team_scrape)
        player_list = []
        pitcher = self.get_pitcher(pitcher_scrape)
        player_list.append({'fixture':game_num,
                                'name':pitcher,
                                'position':'P',
                                'team':team})
        for player_, position_ in zip(players_scrape, position_scrape):
            position = position_.text
            player_name = player_.attrib['title']
            player_list.append({'fixture':game_num,
                                'name':player_name,
                                'position':position,
                                'team':team})
        return player_list

In [19]:
gl = daily_lineups()
pull_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

In [20]:
lineups = pd.DataFrame.from_dict(gl.main()).drop_duplicates()

In [21]:
lineups['game_date'] = today_date
lineups['pull_time'] = pull_time

In [6]:
con = sqlite3.connect(config.lineups_db_path)

In [7]:
lineups.to_sql('daily_lineups', con, if_exists='append', index=False)

In [29]:
con.close()
print('Daily Lineup Table Updated')

Daily Lineup Table Updated


# Name Mapping

In [5]:
import pandas as pd
import requests
from io import StringIO
import config
import sqlite3

In [2]:
name_map_url = "http://crunchtimebaseball.com/master.csv"
name_map_content = requests.get(name_map_url).content.decode('utf-8','ignore')
name_map = pd.read_csv(StringIO(name_map_content))

In [None]:
con = sqlite3.connect(config.player_mapping_db)
existing_players = pd.read_sql('select * from {}'.format(config.player_map_table), con=con)
final_map = existing_players.append(name_map).drop_duplicates()

In [6]:
final_map.to_sql(con=con, name=config.player_map_table, if_exists='replace', index=False)
print("Player ID's updated")
con.close()

In [29]:
lineups.merge(final_map[['rotowire_name','retro_id']], left_on='name',right_on='rotowire_name')

Unnamed: 0,fixture,name,position,team,game_date,pull_time,rotowire_name,retro_id
0,1,Wei-Yin Chen,P,MIA,2019-03-15,2019-03-15 10:43:45,Wei-Yin Chen,chenw001
1,1,Lewis Brinson,CF,MIA,2019-03-15,2019-03-15 10:43:45,Lewis Brinson,brinl001
2,1,Brian Anderson,3B,MIA,2019-03-15,2019-03-15 10:43:45,Brian Anderson,andeb006
3,1,Martin Prado,1B,MIA,2019-03-15,2019-03-15 10:43:45,Martin Prado,pradm001
4,1,Peter O'Brien,RF,MIA,2019-03-15,2019-03-15 10:43:45,Peter O'Brien,obrip002
5,1,Bryan Holaday,C,MIA,2019-03-15,2019-03-15 10:43:45,Bryan Holaday,holab001
6,1,Yadiel Rivera,SS,MIA,2019-03-15,2019-03-15 10:43:45,Yadiel Rivera,rivey001
7,1,Deven Marrero,2B,MIA,2019-03-15,2019-03-15 10:43:45,Deven Marrero,marrd001
8,1,Sean Newcomb,P,ATL,2019-03-15,2019-03-15 10:43:45,Sean Newcomb,newcs001
9,1,Ozzie Albies,2B,ATL,2019-03-15,2019-03-15 10:43:45,Ozzie Albies,albio001
