In [125]:
import re
import time
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException
from selenium.webdriver import ActionChains

# ElementClickInterceptedException - неможна клацнуть по елементу
# NoSuchElementException - такого елементу неіснує

In [49]:
def handle_error(func, *args, **kwargs):
    def wrap(*args, **kwargs):
        while 1:
            try:
                func(*args, **kwargs)
                break
            except ElementNotInteractableException:
                time.sleep(0.5)
    return wrap


def filter_matches(data, team_name, count_days=3):
    """
    для прошлих count_days=4,
    для будущих count_days=3
    """
    data = list(filter(lambda x: "Следующий матч" not in x.attrs["title"], data))
    return list(
         filter(
            lambda x: team_name not in x.attrs["title"],
            data[:count_days]  
         )
     )
    

def proccess_matches(data, team_name):
    rez = {}
    points = 0
    for match in data:
        h_score, a_score = list(map(lambda x: int(x), 
                (re
                 .findall("\d:\d", match.attrs["title"])[0]
                 .split(":")
                )
        ))
        m = match.attrs["title"].split("(", maxsplit=1)[1]
        h_team = m.split(" - ")[0]
        a_team = m.split(" - ")[1].rsplit(")", maxsplit=1)[0]
        
#         h_team, a_team = re.findall("([\w\s]*) - ([\w\s]*)", match.attrs["title"])[0]
     
        target_score, enemy_score = (h_score, a_score) if h_team == team_name else (a_score, h_score)
        target_team, enemy_team = (h_team, a_team) if h_team == team_name else (a_team, h_team)
        
        point = match.find("div", class_=re.compile("formIcon formIcon")).text

        if point == "Н":
            points+=1
        elif point == "B":
            points+=3
            
        
        if enemy_team not in rez:
            rez[enemy_team] = {"z" : target_score, "p" : enemy_score}
        else:
            rez[enemy_team]["z"] += target_score
            rez[enemy_team]["p"] += enemy_score
    return rez, points


def count_rez(data, enemy_list):
    rez = {"z": 0, "p": 0}
    for enemy in enemy_list:
        rez["z"] += data[enemy]["z"]
        rez["p"] += data[enemy]["p"]

    return rez


def get_driver():
    path_to_driver = "./chromedriver"
    chrome_service = Service(path_to_driver)
    options = Options()
#     options.add_argument("headless")
    return webdriver.Chrome(
        service=chrome_service,
        options=options
    )


def get_ids(data):
    soup = BeautifulSoup(data, 'html.parser')
    
    divs_w_ids = (soup
         .find('div', class_=re.compile("sportName soccer"))
         .find_all('div', class_=re.compile("event__match"))
        )
    
    return list(map(
        lambda x: x.attrs["id"][4:],
        divs_w_ids
    ))


def choose_day(driver, days):
#     time.sleep(1)
    for i in range(days):
        
        element = driver.find_element(By.CLASS_NAME, "calendar__navigation--yesterday")   
#         element = driver.find_element(By.CLASS_NAME, "calendar__navigation--tomorrow")   
        element.click()
        time.sleep(1.5)
        
        
def get_source_w_ids(driver, days=0):
    driver.get("https://www.soccerstand.com/ru/")
    choose_day(driver, days=days)
    return driver.page_source



def get_data_from_page(driver, id_team, elem='Таблица'):
    # driver.execute_script("window.open('');")  # open new tab
    # driver.switch_to.window(driver.window_handles[1])  # Switch to the new window
    
    driver.get(f"https://www.soccerstand.com/ru/match/{id_team}/#/standings/table/overall")
    
#     driver.get(f"https://www.soccerstand.com/ru/match/{id_team}/#/match-summary") # old
    time.sleep(1)
#     element = driver.find_element(By.XPATH, f"//a[contains(text(), {elem})]")
#     element.click()
#     time.sleep(0.5)
    
    return driver.page_source

 

 
    

In [3]:
def teams_name(soup):
    return (
        (soup
                .find("div", class_=re.compile("duelParticipant__home"))
                .find("a", class_=re.compile("participant__participantName"))
                     ).text,
       (soup
            .find("div", class_=re.compile("duelParticipant__away"))
            .find("a", class_=re.compile("participant__participantName"))
                 ).text
           )

def teams_score(soup):
    return (soup
                .find("div", class_=re.compile("duelParticipant__score"))
                .find("div", class_=re.compile("detailScore__wrapper"))
                     ).text.split("-")

def teams_matches(soup, h_team, a_team):    
    target_rows = (soup.find_all("div", class_=re.compile("table__row--selected")))
    row_team_name = (target_rows[0].find("div", class_=re.compile("tableCellParticipant__block"))).text 
    h_team_row, a_team_row =  target_rows if row_team_name == h_team else target_rows[::-1]
    return (
        (h_team_row
               .find("div", class_="table__cell table__cell--form")
               .find_all("div", class_=re.compile("tableCellFormIcon"))
              ),
        (a_team_row
           .find("div", class_="table__cell table__cell--form")
           .find_all("div", class_=re.compile("tableCellFormIcon"))
          )
    )


def matches_rez(soup):
    return [x.text for x in soup ] 


def do_work(data, tot):
    soup = BeautifulSoup(data, 'html.parser')

    h_team, a_team = teams_name(soup)
    h_score, a_score = teams_score(soup)
    h_matches, a_matches = teams_matches(soup, h_team, a_team)
    
    h_matches_filtred = filter_matches(h_matches, a_team)
    a_matches_filtred = filter_matches(a_matches, h_team)
    
    h_matches_sym = matches_rez(h_matches_filtred)
    a_matches_sym = matches_rez(a_matches_filtred)
    
    h_proc_matches, h_points =  proccess_matches(h_matches_filtred, h_team)
    a_proc_matches, a_points =  proccess_matches(a_matches_filtred, a_team)
    inters_teams = list(set(list(a_proc_matches.keys())).intersection(list(h_proc_matches.keys())))

    h_team_rez = count_rez(h_proc_matches, inters_teams)
    a_team_rez = count_rez(a_proc_matches, inters_teams)
    
    tot.append([
            h_team,
            a_team,
            h_score,
            a_score,
            h_matches_sym,
            a_matches_sym
        ])
 
#     if inters_teams:
#         print(
#             f"{h_team[:4]} - {a_team[:4]} || "
#             f" {h_team_rez['z']} - {h_team_rez['p']} || "
#             f" {a_team_rez['z']} - {a_team_rez['p']} || "
#             f" {score} || "
#             f"{inters_teams} || "
#             f"{h_points} - {a_points}"

#         )
#         tot.append([
#             h_team,
#             a_team,
#             h_team_rez['z'],
#             h_team_rez['p'],
#             a_team_rez['z'],
#             a_team_rez['p'],
#             h_score,
#             a_score,
#             h_points,
#             a_points
#         ])

In [8]:
def smt():
    try:
        tot = []
        driver = get_driver()
#         sourse_w_ids = get_source_w_ids(driver, 1)
#         ids = get_ids(sourse_w_ids)   
        ids = ["zgsX893T"]
        for id_team in ids:
            try:
                data = get_data_from_page(driver, id_team)
                do_work(data, tot)
            except NoSuchElementException:
                continue
            except (ValueError, IndexError):
                print(id_team)
        
   
    finally:
        driver.quit()
    return tot
    
        

def main():
    a = smt()

In [14]:
a = smt()

['zgsX893T', 'EZMOdC6j', 'GzXvzh7f', 'Wzq7dqWF', 'z9Dxs4WS', 'WIw6RnS1', '0hSs0v8R', 'KWy6YXOj', 'WnkdmQwn', 'Ec9PCbaA', 'hl8TBIEG', 'lfIfKBxO', 'WCO5Sn7L', 'QZN9R6MR', 'UZEM3zGh', 'nTSPex2f', 'ENEknFui', 'MmIcpgA3', '4jMydViI', 'Wbk5v3Mp', 'Uc3yOHd5', 'KndlLFtO', 'YV9SPwSh', 'OK2uNyCB', 'bwehKZeU', 'xO6qMeRH', 'l2WcuJRu', '61L7nCU0', '0OKBoWq7', 'hbH3mhFf', 'AZRblYal', 'GW2b9ZyE', 'fN128giK', 'lxwFP0cT', 'KdobTrb4', 'rLozbsjk', '84QYIrSE', 'Aovwaaa9', 'hC2RfyTn', 'ht4o1VS0', '8dxVfudU', 'S05iCtR4', 'Kz6mDMBb', 'CUrMd1RH', 'tp7qE2dh', 'U5wReLtO', 'fc41s6Tu', 'Gl6ew5Jq', 'nXKxLUPT', 'SAQLVUWI', 'lt74GlYp', 'hALYLAAN', 'rH1Zbnti', 'M70wb6ec', 'QueYzAx8', '4jYxaPP2', 'rsXtbqu9', 'd6nnesPR', '2qmrdN9L', 'Ohlvc3fF', '4dTTxmFq', 'YTQLcli9', 'rJEWKDK0', 'xtEzKXZ6', 'v7gVDr1t', 'MVrzD2Gn', 'AJL4bRKa', 'jeSv0vd4', 'hrB9coz6', 'd2WWr4Gt', 'dvll6mzl', 'IN1dqEUr', 'jg9w4oO0', '0tjPxFW8', 'r5uUyZoF', 'f5tdd0Tb', 'vsOF8lei', 't6KW46fG', 'zVnNnQpo', 'IRsL7RMA', 'p0UD9mhb', 'WMR2yog2', 'INZKyGz2', 'GQ

In [217]:
def find_elems(target, class_, matches=False):
    data = [By.CLASS_NAME,  class_] if matches else [By.XPATH, f"//div[starts-with(@class, '{class_}')]"]
    while 1:
        try:
            elements = target.find_elements(*data)
            if len(elements) == 0:
                time.sleep(0.5)
                continue
            break
        except NoSuchElementException:
            time.sleep(0.5)
            
    return elements


try:
    driver = get_driver()
    
    driver.get("https://www.soccerstand.com/ru/match/zgsX893T/#/standings/table/overall")
    act = ActionChains(driver)
    
    team_rows = find_elems(driver, 'ui-table__row table__row--selected')
 
    matches_elements = find_elems(team_rows[1], 'tableCellFormIcon', matches=True)
    
 
    
    
    while 1:
        try:
            matches_elements[2].click()  # відкриття нового вікна з потрібним матчем
            break
        except :
            act.key_down(Keys.DOWN).key_down(Keys.DOWN).perform()
            time.sleep(0.5)
            
    
    time.sleep(1)
    
    # переключення на нове вікно
    w =  driver.window_handles
    driver.switch_to.window(w[1])
    find_elems(driver, "smv__incidentsHeader section__title")    
    a = driver.page_source
    driver.close()
    
    time.sleep(1)
finally:
    driver.quit()



In [216]:
soup = BeautifulSoup(a, 'html.parser')
# rez = soup.find("div", class_=re.compile("table__cell--form"))
soup.find('div', class_="detailScore__wrapper").text
# table__cell table__cell--form
# soup

'3-1'

In [7]:
df = pd.DataFrame(data=a)
df

Unnamed: 0,0,1,2,3,4,5
0,Централ Кост Маринерс,Ньюкасл Джетс,,,"[B, П]","[П, П]"
1,Мьянма,Малайзия,,,[],[]
2,Лаос,Вьетнам,,,[],[]
3,Кукеси,Лячи,,,"[П, П, Н]","[B, B, П]"
4,Тирана,Партизани,,,"[П, B, Н]","[B, Н, B]"
...,...,...,...,...,...,...
260,Велайта Дича,Хавасса,,,"[B, B, П]","[B, П, Н]"
261,Адама Сити,Дире Дава,,,"[Н, B, П]","[П, B, Н]"
262,Кавальер,Faulkland,,,"[П, B, B]","[П, Н, П]"
263,Маунт Плезант,Уотерхаус,,,"[B, B, Н]","[B, П, П]"


In [8]:
# df.to_csv("./csv/21_12_22_t.csv",index=False)