In [1]:
import re
import time
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver import ActionChains

# ElementClickInterceptedException - неможна клацнуть по елементу
# NoSuchElementException - такого елементу неіснує

In [2]:
def get_driver():
    path_to_driver = "./chromedriver"
    chrome_service = Service(path_to_driver)
    options = Options()
#     options.add_argument("headless")
    return webdriver.Chrome(
        service=chrome_service,
        options=options
    )


def find_elems(target, attr, attr_data, by_text=False):
    
    if attr == By.XPATH and by_text is False:
        attr_data =  f"//div[starts-with(@class, '{attr_data}')]" 
    if attr == By.XPATH and by_text is True:
        attr_data =  f"//div[text()='{attr_data}']"
    
    count = 0
    while count < 6:
        try:
            elements = target.find_elements(attr, attr_data)
            if len(elements) == 0:
                time.sleep(0.5)
                count += 1
                continue
            break
        except NoSuchElementException:
            time.sleep(0.5)
            count += 1
    
    return elements


def click_elem(driver, element):
    """відкриття нового вікна з потрібним матчем"""
    
    act = ActionChains(driver)
    while 1:
        try:
            element.click()  
            break
        except Exception as e:
            act.key_down(Keys.DOWN).key_down(Keys.DOWN).perform()
            time.sleep(0.5)


def choose_day(driver, days, to_day):
    """
    вчорашні/завтрашні матчі ?! yesterday/tomorrow
    """
    for _ in range(days):
        elements = find_elems(driver, By.CLASS_NAME, f"calendar__navigation--{to_day}")
        click_elem(driver, elements[0])
        
        
def get_source_w_ids(driver, days=1, to_day='yesterday'):
    driver.get("https://www.soccerstand.com/ru/")
    choose_day(driver, days=days, to_day=to_day)
    find_elems(driver, By.CLASS_NAME, 'sportName soccer')
    return driver.page_source

    
def get_ids(data):
    soup = BeautifulSoup(data, 'html.parser')
    
    divs_w_ids = (soup
         .find('div', class_=re.compile("sportName soccer"))
         .find_all('div', class_=re.compile("event__match"))
        )
    
    return list(map(
        lambda x: x.attrs["id"][4:],
        divs_w_ids
    ))

    
def teams_name(soup):
    return (
        (soup
                .find("div", class_=re.compile("duelParticipant__home"))
                .find("a", class_=re.compile("participant__participantName"))
                     ).text,
       (soup
            .find("div", class_=re.compile("duelParticipant__away"))
            .find("a", class_=re.compile("participant__participantName"))
                 ).text
           )

def teams_score(soup):
    return (soup
                .find("div", class_=re.compile("duelParticipant__score"))
                .find("div", class_=re.compile("detailScore__wrapper"))
                     ).text


def get_res_by_half(soup):
    """
    знаходження результатів кожного тайма
    повертає: список результатів, як строчки
    """
    f_half_raw, s_half_raw = soup.find_all('div', class_=re.compile("smv__incidentsHeader"))
    return  f_half_raw.find_all('div')[1].text, s_half_raw.find_all('div')[1].text


def close_cookies_div(driver):
    """
    закриття вікна з підтвердження куків
    """
    global GLOBALS 
    
    elems = find_elems(driver, By.ID, 'onetrust-accept-btn-handler')
    if not elems:
        return
    elems[0].click()
    GLOBALS['close_cookies'] = True
    

def get_res_data(driver, without_score=False):
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    h_team, a_team = teams_name(soup)
    
    
    if without_score is True:
        return {
            "h_team": h_team,
            "a_team": a_team,
            "score": 'without_score',
            "f_half": 'without_score',
            "s_half": 'without_score',
        }
    else:
        elems = find_elems(driver, By.XPATH, "2-й тайм", by_text=True)        
        if not elems:
            raise Exception('нет данних за 1 тайм ')
        score = teams_score(soup)
        f_half, s_half = get_res_by_half(soup)
        return {
            "h_team": h_team,
            "a_team": a_team,
            "score": score,
            "f_half": f_half,
            "s_half": s_half,
        }


def do_job(driver, match_id):
    global GLOBALS 
    
    res_dict = {
        'current_match_data': None,
        'h_match_data': None,
        'a_match_data': None
    }
    
    
    driver.get(f"https://www.soccerstand.com/ru/match/{match_id}/#/match-summary/match-summary")

    if GLOBALS['close_cookies'] is False:
        close_cookies_div(driver)
        
        
    current_match_data = get_res_data(
            driver, 
            without_score=True if GLOBALS['to_day'] == 'tomorrow' else False
        )
        

    res_dict['current_match_data'] = current_match_data

    driver.get(f"https://www.soccerstand.com/ru/match/{match_id}/#/standings/table/overall")

    team_rows = find_elems(driver, By.XPATH, 'ui-table__row table__row--selected' )

    for team in team_rows:
        line_data_row = team.text
        matches_elements = find_elems(team , By.CLASS_NAME, 'tableCellFormIcon' )


        target_match = 2 if '?' in line_data_row else 1
        target_match = target_match - 1 if GLOBALS['to_day'] == 'tomorrow' else target_match
        
        click_elem(driver, matches_elements[target_match])

        # переключення на нове вікно
        w =  driver.window_handles    
        driver.switch_to.window(w[1])
        time.sleep(1)
        try:       
            window_data = get_res_data(driver)
            if current_match_data['h_team'] == line_data_row.split('\n')[1]:
                res_dict['h_match_data'] = window_data
            else:
                res_dict['a_match_data'] = window_data
        finally:
            driver.close()
            driver.switch_to.window(w[0])

    return res_dict 



In [3]:
def main_job(to_day):
    try:
        rez_list = []
        driver = get_driver()
        
        sourse_w_ids = get_source_w_ids(driver, to_day=to_day)
        match_ids = get_ids(sourse_w_ids)
#         match_ids = ['t6h4FgKo', ]
        for match_id in match_ids:
            try:
                row_data = do_job(driver, match_id)
                if row_data is None:
                    continue
                rez_list.append(row_data)
                print(row_data['current_match_data']['h_team'])
            except Exception as e:
                print(e, type(e), match_id)
#                 print(e.with_traceback())
                continue
    finally:
        driver.quit()
    return rez_list
    
    
GLOBALS = {
    'close_cookies': False, 
    'to_day': 'tomorrow',  
#     'to_day': 'yesterday', 
}    

rez = main_job(to_day=GLOBALS['to_day'])


Мадагаскар
Гана
Замбия
Конго
Габон
Алжир
list index out of range <class 'IndexError'> rBMLV0dn
list index out of range <class 'IndexError'> xUTX4DAJ
list index out of range <class 'IndexError'> dfx93rDg
list index out of range <class 'IndexError'> CY8t1zZ4
list index out of range <class 'IndexError'> MLoosAIC
list index out of range <class 'IndexError'> x8G3tIA4
list index out of range <class 'IndexError'> S8s9h3n2
list index out of range <class 'IndexError'> QVF7fKhT
Бирмингем U21
Южная Африка U23
Аль-Хадд
Манама Клуб
Porto Vitoria
Эрсилиу Луж
Мото Клуб
Кальденсе
Пирасикаба
Ремо
Куяба
АБС
Баия
Кампиненсе
Наутико
Санта Круз
Сеара
Сержипи
ЦСА
Атлетико Минейро U20
Атлетико Паранаэнсе U20
Сан-Паулу U20
Шелаху
Португалия U20
Норвегия U20
Ирландия U17 (Ж)
Германия U17 (Ж)
Словакия U17 (Ж)
Словения U17 (Ж)
Италия U17 (Ж)
Венгрия U17 (Ж)
Шимшон Тель-Авив
Маккаби Герцлия
Хапоэль Кфар-Шалем
Бангалор 2
Джана Эрминио
Яссы
Туран
Аль-Духаиль
Умм-Салаль
Мильонариос
Ла Экидад (Ж)
Десампарадос
нет дан

In [4]:
len(rez)

124

In [5]:
headers = [
    'cur_match_h_team',
    'cur_match_a_team',
    'cur_match_score',
    'cur_match_f_half',
    'cur_match_s_half',
    
    'h_match_h_team',
    'h_match_a_team',
    'h_match_score',
    'h_match_f_half',
    'h_match_s_half',
    
    'a_match_h_team',
    'a_match_a_team',
    'a_match_score',
    'a_match_f_half',
    'a_match_s_half',
]

data = []
for m in rez:
    try:
        data.append([
        m['current_match_data']['h_team'],
        m['current_match_data']['a_team'],
        m['current_match_data']['score'],
        m['current_match_data']['f_half'],
        m['current_match_data']['s_half'],

        m['h_match_data']['h_team'],
        m['h_match_data']['a_team'],
        m['h_match_data']['score'],
        m['h_match_data']['f_half'],
        m['h_match_data']['s_half'],

        m['a_match_data']['h_team'],
        m['a_match_data']['a_team'],
        m['a_match_data']['score'],
        m['a_match_data']['f_half'],
        m['a_match_data']['s_half'],
        ])
    except: 
        continue
data

[['Мадагаскар',
  'ЦАР',
  'without_score',
  'without_score',
  'without_score',
  'Мадагаскар',
  'Ангола',
  '1-1',
  '1 - 1',
  '0 - 0',
  'ЦАР',
  'Гана',
  '1-1',
  '1 - 1',
  '0 - 0'],
 ['Гана',
  'Ангола',
  'without_score',
  'without_score',
  'without_score',
  'ЦАР',
  'Гана',
  '1-1',
  '1 - 1',
  '0 - 0',
  'Мадагаскар',
  'Ангола',
  '1-1',
  '1 - 1',
  '0 - 0'],
 ['Замбия',
  'Лесото',
  'without_score',
  'without_score',
  'without_score',
  'Замбия',
  'Коморские острова',
  '2-1',
  '1 - 1',
  '1 - 0',
  'Лесото',
  "Кот-д'Ивуар",
  '0-0',
  '0 - 0',
  '0 - 0'],
 ['Конго',
  'Южный Судан',
  'without_score',
  'without_score',
  'without_score',
  'Конго',
  'Гамбия',
  '1-0',
  '0 - 0',
  '1 - 0',
  'Южный Судан',
  'Мали',
  '1-3',
  '1 - 0',
  '0 - 3'],
 ['Габон',
  'Судан',
  'without_score',
  'without_score',
  'without_score',
  'Габон',
  'Мавритания',
  '0-0',
  '0 - 0',
  '0 - 0',
  'Судан',
  'ДР Конго',
  '2-1',
  '1 - 0',
  '1 - 1'],
 ['Алжир',
  'Нигер

In [6]:
df = pd.DataFrame(columns=headers, data=data)
df

Unnamed: 0,cur_match_h_team,cur_match_a_team,cur_match_score,cur_match_f_half,cur_match_s_half,h_match_h_team,h_match_a_team,h_match_score,h_match_f_half,h_match_s_half,a_match_h_team,a_match_a_team,a_match_score,a_match_f_half,a_match_s_half
0,Мадагаскар,ЦАР,without_score,without_score,without_score,Мадагаскар,Ангола,1-1,1 - 1,0 - 0,ЦАР,Гана,1-1,1 - 1,0 - 0
1,Гана,Ангола,without_score,without_score,without_score,ЦАР,Гана,1-1,1 - 1,0 - 0,Мадагаскар,Ангола,1-1,1 - 1,0 - 0
2,Замбия,Лесото,without_score,without_score,without_score,Замбия,Коморские острова,2-1,1 - 1,1 - 0,Лесото,Кот-д'Ивуар,0-0,0 - 0,0 - 0
3,Конго,Южный Судан,without_score,without_score,without_score,Конго,Гамбия,1-0,0 - 0,1 - 0,Южный Судан,Мали,1-3,1 - 0,0 - 3
4,Габон,Судан,without_score,without_score,without_score,Габон,Мавритания,0-0,0 - 0,0 - 0,Судан,ДР Конго,2-1,1 - 0,1 - 1
5,Алжир,Нигер,without_score,without_score,without_score,Танзания,Алжир,0-2,0 - 1,0 - 1,Уганда,Нигер,1-1,1 - 0,0 - 1
6,Бирмингем U21,Крю U21,without_score,without_score,without_score,Шеффилд Уэнсдей U21,Бирмингем U21,1-2,0 - 2,1 - 0,Колчестер Юнайтед U21,Крю U21,1-1,1 - 0,0 - 1
7,Кальденсе,Патросиненсе,without_score,without_score,without_score,Демократа СЛ,Кальденсе,1-0,1 - 0,0 - 0,Патросиненсе,Демократа СЛ,2-0,1 - 0,1 - 0
8,АБС,Флуминенсе ПИ,without_score,without_score,without_score,Регатас,АБС,1-0,0 - 0,1 - 0,Флуминенсе ПИ,Баия,1-1,0 - 0,1 - 1
9,Баия,Регатас,without_score,without_score,without_score,Флуминенсе ПИ,Баия,1-1,0 - 0,1 - 1,Регатас,АБС,1-0,0 - 0,1 - 0


In [7]:
# df.to_csv('csv/future/23_03_23.csv', index=False)
# df.to_csv('csv/past/23_03_23.csv', index=False)