In [65]:
import sys
import time
import os
import asyncio
import numpy as np
import pandas as pd
import json
import traceback
from typing import List, Dict

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

from IPython.display import clear_output

from parsers import DataParser
from functions import extractText

In [3]:
def getDriver(url):
    chrome_options = Options()

    chrome_prefs = {}
    chrome_prefs["profile.default_content_settings"] = { "popups": 1 }

    driver = webdriver.Chrome(options=chrome_options, executable_path="./chromedriver")
    driver.get(url)
    driver.maximize_window()
    
    return driver

In [4]:
fonbet = getDriver('https://www.fonbet.ru/live/')
xstavka = getDriver('https://1xstavka.ru/en/live/')

In [72]:
def extractAttr(dct: Dict, attr: str) -> List:
    res = []    
    if attr in dct:
        res.append(dct[attr])
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractAttr(x, attr))
            
    return res


def extractText(dct: Dict) -> List:
    res = []    
    if 'text' in dct:
        res.append(dct['text'].strip())
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractText(x))
    
    return res


def simplifyList(lst):
    if type(lst) != type([]):
        return lst
    if not lst:
        return None
    if not lst[0]:
        return None
    if len(lst) == 1:
        return simplifyList(lst[0])
    
    res = []
    
    for el in lst:        
        res.append(simplifyList(el))
        
    return res


def toDict(web_elem):
    parser = DataParser()
    parser.feed(web_elem.get_attribute('innerHTML'))
    return parser.data

In [73]:
fonbet_root = WebDriverWait(fonbet, 10).until(
        EC.presence_of_element_located((By.XPATH, '//div[@class="table__flex-container"]'))
    ).find_elements_by_xpath('./*')[0]

data = toDict(fonbet_root)

ligas = data['children'][0]['children']

features_cnt = 14
df_fonbet = pd.DataFrame()
columns_added_flg = False

for liga in ligas:
    text_tmp = extractText(liga)
    text = simplifyList(text_tmp)
    
    headers = []
    for el in text[0][-features_cnt:]:
        if el not in headers:
            headers.append(el)
        else:
            headers.append(el + ('*'))
            
    rows = text[1:]
    
    if not columns_added_flg:
        df_fonbet = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True
    
    for row in rows:
        try:
            players, coeffs = row[1][0][1].split('—'), row[-features_cnt:] #костыль для игроков
            
            if len(players) == 2: #только строки, в которых ставки на основные исходы матча
                player_1_tmp, player_2_tmp = players

                player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()

                if len(headers) == len(coeffs):
                    columns = ['Player 1', 'Player 2'] + headers
                    data = [player_1, player_2] + coeffs

                    df_fonbet = df_fonbet.append(dict(zip(columns, data)), ignore_index=True)
            
        except Exception as exc:
            print(exc)

df_fonbet

Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,X2,Фора,1*,Фора*,2*,Тотал,Б,М,Доп
0,Италия (Chellovekk),Франция (BlackStar98),38.0,5.2,1.16,4.6,1.13,,1.5,1.3,-1.5,3.25,1.5,1.82,1.88,32.0
1,Бразилия (TAKA),Англия (mooneycb),,,,,,,-4.5,1.42,4.5,2.65,5.5,1.4,2.7,28.0
2,Арсенал Л (labotryas),Ливерпуль (KRaftVK),35.0,9.0,1.06,7.3,1.03,,1.5,2.7,-1.5,1.4,2.5,1.42,2.65,37.0
3,Тоттенхэм (MelToSiK),Ман. Сити (Kray),1.03,13.0,50.0,,,9.0,-2.5,3.35,2.5,1.28,2.5,1.6,2.2,32.0
4,Португалия (Gernaut),Англия (Iamdevilwalk),,,,,,,,,,,,,,
5,Черные Грифоны,Реактивные Бизоны,2.55,3.95,2.4,1.55,1.23,1.48,0.0,1.93,0.0,1.82,5.5,2.25,1.6,54.0
6,Оттава Сенаторз (barkes_15),Торонто (Kumaa1998),,,,,,,,,,,,,,
7,Хьюстон Рокетс (Galka_khv),Филадельфия 76 (Squonck),2.31,,1.54,,,,3.5,1.8,-3.5,1.9,118.5,1.85,1.85,6.0
8,Бостон Селтикс (uskov9713),ЛА Клипперс (daniil_1509),1.19,,4.15,,,,-6.5,1.87,6.5,1.83,121.5,1.83,1.87,6.0
9,Нью-Йорк Никс (Miller),Бруклин Нетс (iamdevilwalk),1.98,,1.73,,,,1.5,1.85,-1.5,1.85,115.5,1.85,1.85,6.0


In [20]:
xstavka_root = xstavka.find_element_by_xpath('//div[@class="game_content_line on_main live-content "]/div/div/div')
headers_html = xstavka_root.find_element_by_xpath('.//div[@class="c-events__item c-events__item_head greenBack"]')

data = headers_html.get_attribute('innerHTML')

parser = DataParser()

parser.feed(data)

In [56]:
match = parser.data['children'][0]['children'][2]

In [61]:
text_tmp = extractAttr(match, 'title')
columns = simplifyList(text_tmp)
columns

['1',
 'Draw',
 '2',
 'W1 or Draw',
 'W1 or W2',
 'Draw or W2',
 'Total over',
 'Total parameter',
 'Total under',
 'Team 1 Handiсap',
 'Handicap parameter',
 'Team 2 Handicap',
 'Team 1 Total Over',
 'Team Total value',
 'Team 1 Total Under',
 'Team 2 Total Over',
 'Team Total value',
 'Team 2 Total Under']

In [70]:
async def parseMatch(match, columns):
    res = {}
    df = pd.DataFrame(columns=['Player 1', 'Player 2'] + columns)
    
    try:
        elements = toDict(match)
        text_tmp = extractText(elements)
        text = simplifyList(text_tmp)
        print(text)
        
        features = text[0][0][1]

        player_1, player_2  = None, None
    
        player_1_tmp, player_2_tmp = text[0][0][0][0][1][0]
        player_1, player_2 = player_1_tmp[0], player_2_tmp[0]
        
        df = df.append(dict(zip(['Player 1', 'Player 2'] + columns, [player_1, player_2] + features)), ignore_index=True)
        res['success'] = True
        
    except Exception as exc:
        res['success'] = False
        res['error'] = exc
        traceback.print_exc()
        
    
    res['result'] = df
    
    return res


In [71]:
df = pd.DataFrame(columns=['Player 1', 'Player 2'] + columns)

start = time.time()

matches = xstavka_root.find_elements_by_xpath('.//div[@class="c-events__item c-events__item_col"]')

df_xstavka = pd.concat([x['result'] for x in await asyncio.gather(*[parseMatch(match, columns) for match in matches])])
display(df_xstavka)

print(time.time() - start)
clear_output(wait=True)

None
None
None
None
None
None
None
None
None
None
None
None
None
None


Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    f

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None


Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    f

None
None
None
None
None
None
None
None
None


Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    features = text[0][0][1]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
  File "<ipython-input-70-064731aafe67>", line 11, in parseMatch
    f

Unnamed: 0,Player 1,Player 2,1,Draw,2,W1 or Draw,W1 or W2,Draw or W2,Total over,Total parameter,Total under,Team 1 Handiсap,Handicap parameter,Team 2 Handicap,Team 1 Total Over,Team Total value,Team 1 Total Under,Team 2 Total Over,Team Total value.1,Team 2 Total Under


0.562190055847168


In [14]:
df_fonbet.iloc[:, range(5)].merge(df_xstavka.iloc[:, range(5)], on='Player 1')

Unnamed: 0,Player 1,Player 2_x,1_x,X,2_x,Player 2_y,1_y,Draw,2_y
0,Crystal Palace,Burnley,4.4,3.10,1.2,Burnley,2.048,3.085,4.36
1,Genclerbirligi,Kasimpasa,1.45,4.20,2.7,Kasimpasa,7.2,3.86,1.55
2,Cittadella,Perugia,5.0,3.20,1.17,Perugia Calcio,2.26,3.11,3.41
3,Arda Kardzhali,Botev Plovdiv,6.5,2.70,1.1,Botev Plovdiv,2.99,2.59,2.86
4,Hapoel Petah Tikva,Hapoel Afula,3.8,[],1.22,Hapoel Afula,-,-,-
5,Maccabi Ahi Nazareth,Hapoel Ashkelon,1.95,1.55,1.75,Hapoel Ashkelon,4.75,1.57,4.5
6,Ural Academy,Ural Raiders,1.5,"[[56912, [Ural Academy — Ural Raiders]], [[[[]...",1.85,Ural Riders,1.1,-,6.98


In [15]:
df_fonbet.loc[:, ['Player 1', 'Player 2']].to_csv('fonbet_names.csv')
df_xstavka.loc[:, ['Player 1', 'Player 2']].to_csv('xstavka_names.csv')