In [2]:
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from selenium import webdriver
from SerialParserForDaumKBO import SerialParserForDaumKBO

import sys
import re
import urllib2
import os

In [23]:
class Parser_DaumKBO:
    '''
    stadium : 경기장

    seasonStat : 선발투수 시즌성적
    ------- key list -------
    (away/home)StarterPitcher : 선발투수 이름
    (away/home)StarterPitcherWinCount : 선발투수 승수
    (away/home)StarterPitcherLoseCount : 선발투수 패수
    (away/home)StarterPitcherERA : 선발투수 평균자책
    (away/home)StarterPitcherWHIP : 이닝당 안타 볼넷 허용률

    vsStat : 선발투수 상대전적
    ------- key list -------
    (away/home)StarterPitcher : 선발투수 이름
    (away/home)StarterPitcherWinCount : 선발투수 승수
    (away/home)StarterPitcherLoseCount : 선발투수 패수
    (away/home)StarterPitcherERA : 선발투수 평균자책
    (away/home)StarterPitcherWHIP : 이닝당 안타 볼넷 허용률
    
    startingLineUp : 선발타자
    ------- key list -------
    (away/home) : (포지션,이름,평균타율)

    keyPlayer : 키플레이어(타자)
    ------- key list -------
    (away/home) : (포지션,이름,평균타율)

    rank : 팀의 순위(변동)
    ------- key list -------
    (away/home) : (순위,(1:상승/-1:하강/0:유지))

    win_lose : 팀의 전적
    ------- key list -------
    (away/home) : (승,무,패)

    accumulation : 팀의 연승(패)
    ------- key list -------
    (away/home) : (1/-1)

    '''
    def __init__(self,date,awayTeam):
        s=SerialParserForDaumKBO(date,awayTeam)
        serial=s.getSerial()
        url='http://m.sports.media.daum.net/m/sports/pack/3min/%s'%(serial)
        
#         window=nt
        if os.name=='nt':
            driver=webdriver.PhantomJS(executable_path='./phantomjs.exe')
#       ubuntu=posix
        else:
            driver=webdriver.PhantomJS(executable_path='./phantomjs')
        driver.get(url)
        
        data=driver.page_source
        html=BeautifulSoup(data)
        
        self.stadium=html.select_one('span.location').text
        self.seasonStat={}
        self.seasonStat['awayStarterPitcher']=html.select_one('div.pitcher_comm.pitcher_away strong.name').text
        self.seasonStat['homeStarterPitcher']=html.select_one('div.pitcher_comm.pitcher_home strong.name').text
        self.seasonStat['awayStarterPitcherWinCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(1) span.bg_graph.graph_away span').text)
        self.seasonStat['homeStarterPitcherWinCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(1) span.bg_graph.graph_home span').text)
        self.seasonStat['awayStarterPitcherLoseCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(2) span.bg_graph.graph_away span').text)
        self.seasonStat['homeStarterPitcherLoseCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(2) span.bg_graph.graph_home span').text)
        self.seasonStat['awayStarterPitcherERA']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(3) span.bg_graph.graph_away span').text)
        self.seasonStat['homeStarterPitcherERA']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(3) span.bg_graph.graph_home span').text)
        self.seasonStat['awayStarterPitcherWHIP']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(4) span.bg_graph.graph_away span').text)
        self.seasonStat['homeStarterPitcherWHIP']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(4) span.bg_graph.graph_home span').text)
        
        self.vsStat={}
        self.vsStat['awayStarterPitcher']=html.select_one('div.pitcher_comm.pitcher_away strong.name').text
        self.vsStat['homeStarterPitcher']=html.select_one('div.pitcher_comm.pitcher_home strong.name').text
        self.vsStat['awayStarterPitcherWinCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(1) span.bg_graph.graph_away span').text)
        self.vsStat['homeStarterPitcherWinCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(1) span.bg_graph.graph_home span').text)
        self.vsStat['awayStarterPitcherLoseCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(2) span.bg_graph.graph_away span').text)
        self.vsStat['homeStarterPitcherLoseCount']=int(html.select_one('div#season_stat ul.list_record li:nth-of-type(2) span.bg_graph.graph_home span').text)
        self.vsStat['awayStarterPitcherERA']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(3) span.bg_graph.graph_away span').text)
        self.vsStat['homeStarterPitcherERA']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(3) span.bg_graph.graph_home span').text)
        self.vsStat['awayStarterPitcherWHIP']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(4) span.bg_graph.graph_away span').text)
        self.vsStat['homeStarterPitcherWHIP']=float(html.select_one('div#season_stat ul.list_record li:nth-of-type(4) span.bg_graph.graph_home span').text)
        
#         ---------------------------------------------------------------------------------------------------------------------------------------------------------------
#         lineup부분을 새로 가져와야함
        url='http://m.sports.media.daum.net/m/sports/pack/3min/%s?lineup'%(serial)
#         window=nt
        if os.name=='nt':
            driver=webdriver.PhantomJS(executable_path='./phantomjs.exe')
#       ubuntu=posix
        else:
            driver=webdriver.PhantomJS(executable_path='./phantomjs')
        driver.get(url)
        data=driver.page_source
        html=BeautifulSoup(data)
#         ---------------------------------------------------------------------------------------------------------------------------------------------------------------
        self.startingLineUp={}
        for line in html.select('div.wrap tbody tr'):
            self.startingLineUp['away']=(line.select_one('td.position.away').text,line.select_one('td:nth-of-type(2)').text,float(line.select_one('td.batting_average').text))
            self.startingLineUp['home']=(line.select_one('td.position.home').text,line.select_one('td:nth-of-type(2)').text,float(line.select_one('td.batting_average').text))
        
        self.keyPlayer={}
        try:
            keyPlayerParentNode=html.select_one('position.away.key-player').parent
            self.keyPlayer['away']=(keyPlayerParentNode.select_one('td.position.away').text,keyPlayerParentNode.select_one.select_one('td:nth-of-type(2)').text,float(keyPlayerParentNode.select_one.select_one('td.batting_average').text))
        except AttributeError:
            sys.stderr.write('html.select_one(\'position.away.key-player\') == None\n')
        try:
            keyPlayerParentNode=html.select_one('position.home.key-player').parent
            self.keyPlayer['home']=(keyPlayerParentNode.select_one('td.position.home').text,keyPlayerParentNode.select_one.select_one('td:nth-of-type(2)').text,float(keyPlayerParentNode.select_one.select_one('td.batting_average').text))
        except AttributeError:
            sys.stderr.write('html.select_one(\'position.home.key-player\') == None\n')
        
        self.criticalInning=[]
        try:
            for x in html.select('table.tbl_score strong.img_highlight.ico_decisive'):
                node=x.parent.parent.parent
                if node.attr('data-half')=='first':
                    self.criticalInning.append(str.format('%s%d'%('A',int(node.attr('data-inning').text))))
                else:
                    self.criticalInning.append(str.format('%s%d'%('B',int(node.attr('data-inning').text))))
        except TypeError:
            sys.stderr.write('html.select(\'table.tbl_score strong.img_highlight.ico_decisive\') == None\n')
        
#         ---------------------------------------------------------------------------------------------------------------------------------------------------------------
#         result부분을 새로 가져와야함
        url='http://m.sports.media.daum.net/m/sports/pack/3min/%s?result'%(serial)
#         window=nt
        if os.name=='nt':
            driver=webdriver.PhantomJS(executable_path='./phantomjs.exe')
#       ubuntu=posix
        else:
            driver=webdriver.PhantomJS(executable_path='./phantomjs')
        driver.get(url)
        data=driver.page_source
        html=BeautifulSoup(data)
#         ---------------------------------------------------------------------------------------------------------------------------------------------------------------
                
        self.rank={}
        sen=html.select_one('div.recent_stats div.away p.change').text
#      ex)   순위 9위 (-)
        rankNumber=int(sen.split()[1][:-1])
#      순위상승
        if u'▲' in sen:
            self.rank['away']=(rankNumber,int(sen[sen.find('▲'):sen.find(')')]))
#      순위하강
        elif u'▽' in sen:
            self.rank['away']=(rankNumber,-1*int(sen[sen.find('▽'):sen.find(')')]))
#      순위유지
        else:
            self.rank['away']=(rankNumber,0)
        sen=html.select_one('div.recent_stats div.home p.change').text
#      ex)   순위 9위 (-)
        rankNumber=int(sen.split()[1][:-1])
#      순위상승
        if u'▲' in sen:
            self.rank['home']=(rankNumber,int(sen[sen.find('▲'):sen.find(')')]))
#      순위하강
        elif u'▽' in sen:
            self.rank['home']=(rankNumber,-1*int(sen[sen.find('▽'):sen.find(')')]))
#      순위유지
        else:
            self.rank['home']=(rankNumber,0)
        
        self.win_lose={}
        sen=html.select_one('div.recent_stats div.away p.win-lose').text
        self.win_lose['away']=map(int,re.findall('[\d]+',sen))
        sen=html.select_one('div.recent_stats div.home p.win-lose').text
        self.win_lose['home']=map(int,re.findall('[\d]+',sen))
        
        self.accumulation={}
        sen=html.select_one('div.recent_stats div.away p.accumulation').text
        self.accumulation['away']= int(re.findall('[\d]+',sen)[0]) if sen[-1]==u'승' else (-1)*int(re.findall('[\d]+',sen)[0])
        sen=html.select_one('div.recent_stats div.home p.accumulation').text
        self.accumulation['home']=int(re.findall('[\d]+',sen)[0]) if sen[-1]==u'승' else (-1)*int(re.findall('[\d]+',sen)[0])
        
#         ---------------------------------------------------------------------------------------------------------------------------------------------------------------
#         stats부분을 새로 가져와야함
        url='http://m.sports.media.daum.net/m/sports/pack/3min/%s?stats'%(serial)
#         window=nt
        if os.name=='nt':
            driver=webdriver.PhantomJS(executable_path='./phantomjs.exe')
#       ubuntu=posix
        else:
            driver=webdriver.PhantomJS(executable_path='./phantomjs')
        driver.get(url)
        data=driver.page_source
        html=BeautifulSoup(data)
#         ---------------------------------------------------------------------------------------------------------------------------------------------------------------
        
    self.Record
    tmpList=[]
    for element in html.select('li#page-stats div.vs_graph ul.list_record'):
        tmpList.extend(int(re.findall('[\d]+',element.text))
    for element in map(int,tmpList):
        

SyntaxError: invalid syntax (<ipython-input-23-1dee11d02421>, line 194)

In [16]:
parser=Parser_DaumKBO('20160802','LG')
parser.rank

html.select_one('position.away.key-player') == None
html.select_one('position.home.key-player') == None
html.select('table.tbl_score strong.img_highlight.ico_decisive') == None


{'away': (8, 0), 'home': (1, 0)}

In [22]:
parser.startingLineUp

{}

In [12]:
url='http://m.sports.media.daum.net/m/sports/pack/3min/71041700?stats'

if os.name=='nt':
    driver=webdriver.PhantomJS(executable_path='./phantomjs.exe')
else:
    driver=webdriver.PhantomJS(executable_path='./phantomjs')
driver.get(url)

data=driver.page_source
html=BeautifulSoup(data)

In [15]:
l

[u'7',
 u'4',
 u'0',
 u'0',
 u'1',
 u'0',
 u'1',
 u'2',
 u'12',
 u'7',
 u'0',
 u'0',
 u'1',
 u'1',
 u'4',
 u'4']