In [3]:
import re
import time
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver import ActionChains

# ElementClickInterceptedException - неможна клацнуть по елементу
# NoSuchElementException - такого елементу неіснує

In [147]:
def get_driver():
    path_to_driver = "./chromedriver"
    chrome_service = Service(path_to_driver)
    options = Options()
#     options.add_argument("headless")
    return webdriver.Chrome(
        service=chrome_service,
        options=options
    )


def find_elems(target, attr, attr_data, by_text=False):
    
    if attr == By.XPATH and by_text is False:
        attr_data =  f"//div[starts-with(@class, '{attr_data}')]" 
    if attr == By.XPATH and by_text is True:
        attr_data =  f"//div[text()='{attr_data}']"
    
    count = 0
    while count < 6:
        try:
            elements = target.find_elements(attr, attr_data)
            if len(elements) == 0:
                time.sleep(0.5)
                count += 1
                continue
            break
        except NoSuchElementException:
            time.sleep(0.5)
            count += 1
    
    return elements


def click_elem(driver, element):
    """відкриття нового вікна з потрібним матчем"""
    
    act = ActionChains(driver)
    while 1:
        try:
            element.click()  
            break
        except Exception as e:
            act.key_down(Keys.DOWN).key_down(Keys.DOWN).perform()
            time.sleep(0.5)


def choose_day(driver, days, to_day):
    """
    вчорашні/завтрашні матчі ?! yesterday/tomorrow
    """
    for _ in range(days):
        elements = find_elems(driver, By.CLASS_NAME, f"calendar__navigation--{to_day}")
        click_elem(driver, elements[0])
        
        
def get_source_w_ids(driver, days=1, to_day='yesterday'):
    driver.get("https://www.soccerstand.com/ru/")
    choose_day(driver, days=days, to_day=to_day)
    find_elems(driver, By.CLASS_NAME, 'sportName soccer')
    return driver.page_source

    
def get_ids(data):
    soup = BeautifulSoup(data, 'html.parser')
    
    divs_w_ids = (soup
         .find('div', class_=re.compile("sportName soccer"))
         .find_all('div', class_=re.compile("event__match"))
        )
    
    return list(map(
        lambda x: x.attrs["id"][4:],
        divs_w_ids
    ))

    
def teams_name(soup):
    return (
        (soup
                .find("div", class_=re.compile("duelParticipant__home"))
                .find("a", class_=re.compile("participant__participantName"))
                     ).text,
       (soup
            .find("div", class_=re.compile("duelParticipant__away"))
            .find("a", class_=re.compile("participant__participantName"))
                 ).text
           )

def teams_score(soup):
    return (soup
                .find("div", class_=re.compile("duelParticipant__score"))
                .find("div", class_=re.compile("detailScore__wrapper"))
                     ).text


def get_res_by_half(soup):
    """
    знаходження результатів кожного тайма
    повертає: список результатів, як строчки
    """
    f_half_raw, s_half_raw = soup.find_all('div', class_=re.compile("smv__incidentsHeader"))
    return  f_half_raw.find_all('div')[1].text, s_half_raw.find_all('div')[1].text


def close_cookies_div(driver):
    """
    закриття вікна з підтвердження куків
    """
    global GLOBALS 
    
    elems = find_elems(driver, By.ID, 'onetrust-accept-btn-handler')
    if not elems:
        return
    elems[0].click()
    GLOBALS['close_cookies'] = True
    

def get_res_data(driver, without_score=False):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    h_team, a_team = teams_name(soup)
    
    
    if without_score is True:
        return {
            "h_team": h_team,
            "a_team": a_team,
            "score": 'without_score',
            "f_half": 'without_score',
            "s_half": 'without_score',
        }
    else:
        elems = find_elems(driver, By.XPATH, "2-й тайм", by_text=True)        
        if not elems:
            raise Exception('нет данних за 1 тайм ')
        score = teams_score(soup)
        f_half, s_half = get_res_by_half(soup)
        return {
            "h_team": h_team,
            "a_team": a_team,
            "score": score,
            "f_half": f_half,
            "s_half": s_half,
        }


def do_job(driver, match_id):
    global GLOBALS 
    
    res_dict = {
        'current_match_data': None,
        'h_match_data': None,
        'a_match_data': None
    }
    
    
    driver.get(f"https://www.soccerstand.com/ru/match/{match_id}/#/match-summary/match-summary")

    if GLOBALS['close_cookies'] is False:
        close_cookies_div(driver)
        
        
    current_match_data = get_res_data(
            driver, 
            without_score=True if GLOBALS['to_day'] == 'tomorrow' else False
        )
        

    res_dict['current_match_data'] = current_match_data

    driver.get(f"https://www.soccerstand.com/ru/match/{match_id}/#/standings/table/overall")

    team_rows = find_elems(driver, By.XPATH, 'ui-table__row table__row--selected' )

    for team in team_rows:
        line_data_row = team.text
        matches_elements = find_elems(team , By.CLASS_NAME, 'tableCellFormIcon' )


        target_match = 2 if '?' in line_data_row else 1
        target_match = target_match - 1 if GLOBALS['to_day'] == 'tomorrow' else target_match
        
        click_elem(driver, matches_elements[target_match])

        # переключення на нове вікно
        w =  driver.window_handles    
        driver.switch_to.window(w[1])
        time.sleep(1)
        try:       
            window_data = get_res_data(driver)
            if current_match_data['h_team'] == line_data_row.split('\n')[1]:
                res_dict['h_match_data'] = window_data
            else:
                res_dict['a_match_data'] = window_data
        finally:
            driver.close()
            driver.switch_to.window(w[0])

    return res_dict 



In [145]:
def main_job(to_day):
    try:
        rez_list = []
        driver = get_driver()
        
        sourse_w_ids = get_source_w_ids(driver, to_day=to_day)
        match_ids = get_ids(sourse_w_ids)
#         match_ids = ['t6h4FgKo', ]
        for match_id in match_ids[:5]:
            try:
                row_data = do_job(driver, match_id)
                if row_data is None:
                    continue
                rez_list.append(row_data)
#                 print(row_data['current_match_data']['h_team'])
            except Exception as e:
                print(e, type(e), match_id)
#                 print(e.with_traceback())
                continue
    finally:
        driver.quit()
    return rez_list
    
    
GLOBALS = {
    'close_cookies': False, 
    'to_day': 'tomorrow',  
#     'to_day': 'yesterday', 
}    

rez = main_job(to_day=GLOBALS['to_day'])


calendar__navigation--tomorrow
sportName soccer
onetrust-accept-btn-handler
//div[starts-with(@class, 'ui-table__row table__row--selected')]
tableCellFormIcon
//div[text()='2-й тайм']
tableCellFormIcon
//div[text()='2-й тайм']
//div[starts-with(@class, 'ui-table__row table__row--selected')]
tableCellFormIcon
//div[text()='2-й тайм']
tableCellFormIcon
//div[text()='2-й тайм']
//div[starts-with(@class, 'ui-table__row table__row--selected')]
tableCellFormIcon
//div[text()='2-й тайм']
tableCellFormIcon
//div[text()='2-й тайм']
//div[starts-with(@class, 'ui-table__row table__row--selected')]
tableCellFormIcon
//div[text()='2-й тайм']
tableCellFormIcon
//div[text()='2-й тайм']
//div[starts-with(@class, 'ui-table__row table__row--selected')]
tableCellFormIcon
//div[text()='2-й тайм']
tableCellFormIcon
//div[text()='2-й тайм']


In [146]:
rez

[{'current_match_data': {'h_team': 'Motz/Silz',
   'a_team': 'Имст',
   'score': 'without_score',
   'f_half': 'without_score',
   's_half': 'without_score'},
  'h_match_data': {'h_team': 'Motz/Silz',
   'a_team': 'Куфштайн',
   'score': '1-1',
   'f_half': '1 - 1',
   's_half': '0 - 0'},
  'a_match_data': {'h_team': 'Имст',
   'a_team': 'СВ Воргль',
   'score': '1-2',
   'f_half': '1 - 0',
   's_half': '0 - 2'}},
 {'current_match_data': {'h_team': 'Аккрингтон',
   'a_team': 'Плимут',
   'score': 'without_score',
   'f_half': 'without_score',
   's_half': 'without_score'},
  'h_match_data': {'h_team': 'Аккрингтон',
   'a_team': 'Милтон Кинс',
   'score': '0-1',
   'f_half': '0 - 1',
   's_half': '0 - 0'},
  'a_match_data': {'h_team': 'Плимут',
   'a_team': 'Форест Грин',
   'score': '2-0',
   'f_half': '1 - 0',
   's_half': '1 - 0'}},
 {'current_match_data': {'h_team': 'Барнсли',
   'a_team': 'Шеффилд Уэнсдей',
   'score': 'without_score',
   'f_half': 'without_score',
   's_half': 'wi

'7.\nТолука U20\n11\n5\n2\n4\n18:11\n17\n?\nН\nП\nB\nB\nB'

In [14]:
headers = [
    'cur_match_h_team',
    'cur_match_a_team',
    'cur_match_score',
    'cur_match_f_half',
    'cur_match_s_half',
    
    'h_match_h_team',
    'h_match_a_team',
    'h_match_score',
    'h_match_f_half',
    'h_match_s_half',
    
    'a_match_h_team',
    'a_match_a_team',
    'a_match_score',
    'a_match_f_half',
    'a_match_s_half',
]

data = [
    rez['current_match_data']['h_team'],
    rez['current_match_data']['a_team'],
    rez['current_match_data']['score'],
    rez['current_match_data']['f_half'],
    rez['current_match_data']['s_half'],
    
    rez['h_match_data']['h_team'],
    rez['h_match_data']['a_team'],
    rez['h_match_data']['score'],
    rez['h_match_data']['f_half'],
    rez['h_match_data']['s_half'],
    
    rez['a_match_data']['h_team'],
    rez['a_match_data']['a_team'],
    rez['a_match_data']['score'],
    rez['a_match_data']['f_half'],
    rez['a_match_data']['s_half'],
]
rez

TypeError: list indices must be integers or slices, not str

In [109]:
a


<html lang="ru"><head>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<meta content="index,follow" name="robots"/>
<meta content="width=device-width, initial-scale=1, minimal-ui" name="viewport"/>
<link href="/res/_fs/image/4_favicons/_ss/favicon.ico?v=8" rel="shortcut icon"/>
<link href="/res/_fs/image/4_favicons/_ss/touch-icon-180x180.png?v=4" rel="apple-touch-icon" sizes="180x180"/>
<link href="/res/_fs/image/4_favicons/_ss/favicon-32x32.png?v=4" rel="icon" sizes="32x32" type="image/png"/>
<link href="/res/_fs/image/4_favicons/_ss/favicon-16x16.png?v=4" rel="icon" sizes="16x16" type="image/png"/>
<link href="/manifest/1/?v=5" rel="manifest"/>
<meta content="#001e28" name="theme-color"/>
<meta content="Брайтон - Кристал Пэлас 1:0" name="og:title"/>
<meta content="АНГЛИЯ: Премьер-лига - Тур 8" name="og:description"/>
<meta content="https://www.soccerstand.com/ru/res/image/data/40juIezB-tMjaWeM9-pzMOQXRe.png" name="og:image"/>
<link href="/res/_fs/build/component