In [1]:
import sys
import time
import os
import asyncio
import numpy as np
import pandas as pd
import json
import traceback
from typing import List, Dict

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

from IPython.display import clear_output

from parsers import DataParser
from functions import extractText

In [2]:
def getDriver(url):
    chrome_options = Options()

    chrome_prefs = {}
    chrome_prefs["profile.default_content_settings"] = { "popups": 1 }

    driver = webdriver.Chrome(options=chrome_options, executable_path="./chromedriver")
    driver.get(url)
    driver.maximize_window()
    
    return driver

In [3]:
fonbet = getDriver('https://www.fonbet.ru/live/')
xstavka = getDriver('https://1xstavka.ru/en/live/')

In [71]:
def extractAttr(dct: Dict, attr: str) -> List:
    res = []    
    if attr in dct:
        res.append(dct[attr])
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractAttr(x, attr))
            
    return res


def extractText(dct: Dict) -> List:
    res = []    
    if 'text' in dct:
        res.append(dct['text'].strip())
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractText(x))
    
    return res


def simplifyList(lst):
    if type(lst) == type('s'):
        return lst        
    if lst == []:
        return None
    if len(lst) == 1:
        return simplifyList(lst[0])
    
    res = []
    
    for el in lst:        
        res.append(simplifyList(el))
        
    return res


def toDict(web_elem):
    parser = DataParser()
    parser.feed(web_elem.get_attribute('innerHTML'))
    return parser.data


def getUniqueHeaders(headers_row):
    headers = []
    for el in headers_row:
        if el not in headers:
            headers.append(el)
        else:
            headers.append(el + ('*'))
    return headers


def percFork(coeff_1, coeff_2):
    return 1 - (1 / float(coeff_1) + 1 / float(coeff_2))


def checkPair(first, second):
    res = []

    for x in df_joined.loc[:, [first, second]].values:
        try:
            res.append(percFork(*x))
        except Exception as exc:
            print(exc)
            res.append(None)
            
    return res


async def parseBlock(root, features_cnt):
    text_tmp = extractText(block)
    text = simplifyList(text_tmp)

    headers_row = text[1][3] #unique for every bookmaker

    headers = getUniqueHeaders(headers_row)

    if not columns_added_flg:
        df = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True

    return df

In [72]:
simplifyList(['', 'g', 'd', [[[['b']], 't']]])

['', 'g', 'd', ['b', 't']]

In [73]:
fonbet_root = WebDriverWait(fonbet, 10).until(
        EC.presence_of_element_located((By.XPATH, '//div[@class="table__flex-container"]'))
    ).find_elements_by_xpath('./*')[0]

data = toDict(fonbet_root)

blocks = data['children'][0]['children']

features_cnt = 14
df_fonbet = pd.DataFrame()
columns_added_flg = False

for block in blocks:
    text_tmp = extractText(block)
    text = simplifyList(text_tmp)
    headers_row = text[0][-features_cnt:]
        
    headers = getUniqueHeaders(headers_row)
    
    if not columns_added_flg:
        df_fonbet = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True
        
    rows = text[1:]
    
    for row in rows:
        try:
            players, coeffs = row[1][0][1].split('—'), row[-features_cnt:] #костыль для игроков
            
            if len(players) == 2: #только строки, в которых ставки на основные исходы матча
                player_1_tmp, player_2_tmp = players

                player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()

                if len(headers) == len(coeffs):
                    columns = ['Player 1', 'Player 2'] + headers
                    data = [player_1, player_2] + coeffs

                    df_fonbet = df_fonbet.append(dict(zip(columns, data)), ignore_index=True)
            
        except Exception as exc:
            print(exc)

df_fonbet

Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,X2,Hcap.,1*,Hcap.*,2*,Total,O,U,Extras
0,Lokomotiv Moscow,Krylya Sovetov,14.0,4.2,1.35,3.2,1.23,1.02,1.5,1.27,-1.5,3.8,1.5,1.55,2.5,136.0
1,LokoM-Kr.Sov. Miranchuk Al,Glushenkov M,,,,,,,0.0,1.42,0.0,2.65,0.5,5.8,1.1,
2,Famalicao,Portimonense,5.0,3.8,1.7,2.15,1.27,1.17,1.5,1.37,-1.5,3.1,2.5,1.6,2.35,177.0
3,Millwall,Swansea City,1.42,4.2,9.0,1.06,1.23,2.85,-1.5,2.8,1.5,1.45,2.5,1.7,2.15,185.0
4,Imolese,Arzignano,5.0,2.4,2.15,1.62,1.5,1.13,1.5,1.12,-1.5,5.4,1.5,2.55,1.45,130.0
5,Pergolettese,Pianese,11.5,4.15,1.33,3.05,1.19,1.01,1.5,1.35,-1.5,2.85,4.5,2.5,1.45,87.0
6,Picerno,Rende,1.04,11.5,50.0,,1.01,8.5,-2.5,2.12,2.5,1.64,3.5,1.77,1.93,111.0
7,Ravenna,Fano,2.15,3.4,3.25,1.32,1.3,1.65,-1.5,4.4,1.5,1.19,2.5,2.25,1.6,244.0
8,Tabor Sezana,Triestina,2.9,3.25,2.35,1.53,1.3,1.35,1.5,1.19,-1.5,4.1,2.5,2.13,1.64,45.0
9,MAC Taborsko,Viktoria Plzen-2,1.14,5.9,31.0,,1.1,4.9,-1.5,2.3,1.5,1.55,3.5,1.43,2.6,70.0


In [74]:
xstavka_root = xstavka.find_element_by_xpath('//div[@class="game_content_line on_main live-content "]/div/div/div/div')
blocks = list(map(toDict, xstavka_root.find_elements_by_xpath('.//div[@data-name="dashboard-champ-content"]')))

# data = toDict(xstavka_root)
# blocks = data['children'][0]['children']
# display(blocks.keys())
# display(blocks)

features_cnt = 18
df_xstavka = pd.DataFrame()
columns_added_flg = False

for block in blocks:
# display(block)

    text_tmp = extractText(block)
    text = simplifyList(text_tmp)
    # display(text)

    headers_row = text[1][-1]
    # display(headers_row)

    headers = getUniqueHeaders(headers_row)
    # display(headers)

    if not columns_added_flg:
        df_xstavka = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True

    rows = text[2:]

    for row in rows:
        # display(row)
        try:
            players_tmp = row[1][1][1][2]
    #         display(players_tmp)
            player_1_tmp, player_2_tmp = players_tmp[1][1], players_tmp[2][1]

            player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()
    #         display(player_1, player_2)

            coeffs = row[1][-1]
    #         display(coeffs)
#             display(len(headers), len(coeffs))

            if len(headers) == len(coeffs):
                columns = ['Player 1', 'Player 2'] + headers
                data = [player_1, player_2] + coeffs

                df_xstavka = df_xstavka.append(dict(zip(columns, data)), ignore_index=True)

        except Exception as exc:
            print(exc)

df_xstavka

cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis


Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,2X,O,Total,U,1*,Handicap,2*,O*,IT1,U*,O*.1,IT2,U*.1
0,Lokomotiv Moscow,Krylia Sovetov,17,4.64,1.3,3.71,1.225,1.025,2.41,2.0,1.595,1.95,+1-,1.89,2.26,0.5,1.64,2.26,1.5,1.64
1,Famalicao,Portimonense,4.74,3.64,1.792,2.09,1.315,1.216,2.09,3.0,1.775,1.52,+1-,2.61,1.66,1,2.23,1.66,1.5,2.23
2,Millwall,Swansea City,1.512,4.02,7.5,1.11,1.275,2.656,1.67,2.5,2.26,1.97,-1+,1.87,2.29,2,1.63,2.29,1,1.63
3,MAS Taborsko,Viktoria Plzen II,1.21,5.56,15.2,-,1.13,4.1,1.87,4.0,1.93,2.53,-1.5+,1.52,2.57,2.5,1.48,2.57,1.5,1.48
4,MND Tabor Sežana,Triestina Calcio,2.89,3.27,2.39,1.54,1.315,1.39,2.17,2.5,1.69,2.09,0,1.74,1.68,1,2.13,1.68,1,2.13
5,HB Koge,Vejle,3.29,2.91,2.43,1.55,1.405,1.33,1.745,1.5,2.13,2.29,0,1.65,2.18,0.5,1.65,2.18,1,1.65
6,Sanat Mes Kerman,Mes Rafsanjan,11.9,1.11,11.9,1.02,5.96,1.02,5.7,0.5,1.14,1.9,0,1.9,7.39,0.5,1.08,7.39,0.5,1.08
7,Gol Reyhan Alborz,Fajr Sepasi,1.02,16.3,39,-,-,11.6,2.79,2.5,1.44,2.22,-2+,1.66,-,2,-,-,-,-
8,Malavan,Baadraan Tehran,3.13,2.34,3.1,1.344,1.565,1.34,1.685,1.0,2.18,2.06,0,1.76,1.89,0.5,1.87,1.89,0.5,1.87
9,Sorkhpooshan Pakdasht,Esteghlal Khuzestan,3.13,2.27,3.23,1.32,1.595,1.34,1.755,1.0,2.07,1.85,0,1.95,2.06,0.5,1.73,2.06,0.5,1.73


In [75]:
df_fonbet_short = df_fonbet.iloc[:, range(8)]
df_xstavka_short = df_xstavka.iloc[:, range(8)]

df_joined = df_fonbet_short.merge(df_xstavka_short,  on='Player 1')
df_joined

Unnamed: 0,Player 1,Player 2_x,1_x,X_x,2_x,1X_x,12_x,X2,Player 2_y,1_y,X_y,2_y,1X_y,12_y,2X
0,Lokomotiv Moscow,Krylya Sovetov,14.0,4.2,1.35,3.2,1.23,1.02,Krylia Sovetov,17,4.64,1.3,3.71,1.225,1.025
1,Famalicao,Portimonense,5.0,3.8,1.7,2.15,1.27,1.17,Portimonense,4.74,3.64,1.792,2.09,1.315,1.216
2,Millwall,Swansea City,1.42,4.2,9.0,1.06,1.23,2.85,Swansea City,1.512,4.02,7.5,1.11,1.275,2.656
3,Ravenna,Fano,2.15,3.4,3.25,1.32,1.3,1.65,Alma Juventus Fano 1906,2.44,3.15,2.92,1.38,1.336,1.525
4,HB Koge,Vejle,3.3,2.8,2.35,1.53,1.38,1.3,Vejle,3.29,2.91,2.43,1.55,1.405,1.33
5,Jelgava,FC Riga,,,,,,,Riga,28,23.0,-,-,-,-
6,Warta Poznan,Chrobry Glogow,2.4,2.25,4.6,1.16,1.57,1.5,Chrobry Glogow,2.4,2.2,5.01,1.155,1.63,1.54
7,Adanaspor,Altinordu,3.65,3.1,2.13,1.67,1.35,1.25,Altınordu,3.59,3.18,2.15,1.69,1.35,1.288
8,SavU,LaPa,,,,,,,LaPa,-,23.0,27,-,-,-
9,Sorkhpooshan Pakdasht,Esteghlal Khuzestan,3.1,2.15,3.4,1.27,1.62,1.32,Esteghlal Khuzestan,3.13,2.27,3.23,1.32,1.595,1.34


In [76]:
first, second = '1_x', '2X'
df_joined.loc[:, f'Fork: {first} - {second}'] = checkPair(first, second)
df_joined

float() argument must be a string or a number, not 'NoneType'
float() argument must be a string or a number, not 'NoneType'


Unnamed: 0,Player 1,Player 2_x,1_x,X_x,2_x,1X_x,12_x,X2,Player 2_y,1_y,X_y,2_y,1X_y,12_y,2X,Fork: 1_x - 2X
0,Lokomotiv Moscow,Krylya Sovetov,14.0,4.2,1.35,3.2,1.23,1.02,Krylia Sovetov,17,4.64,1.3,3.71,1.225,1.025,-0.047038
1,Famalicao,Portimonense,5.0,3.8,1.7,2.15,1.27,1.17,Portimonense,4.74,3.64,1.792,2.09,1.315,1.216,-0.022368
2,Millwall,Swansea City,1.42,4.2,9.0,1.06,1.23,2.85,Swansea City,1.512,4.02,7.5,1.11,1.275,2.656,-0.080731
3,Ravenna,Fano,2.15,3.4,3.25,1.32,1.3,1.65,Alma Juventus Fano 1906,2.44,3.15,2.92,1.38,1.336,1.525,-0.120854
4,HB Koge,Vejle,3.3,2.8,2.35,1.53,1.38,1.3,Vejle,3.29,2.91,2.43,1.55,1.405,1.33,-0.05491
5,Jelgava,FC Riga,,,,,,,Riga,28,23.0,-,-,-,-,
6,Warta Poznan,Chrobry Glogow,2.4,2.25,4.6,1.16,1.57,1.5,Chrobry Glogow,2.4,2.2,5.01,1.155,1.63,1.54,-0.066017
7,Adanaspor,Altinordu,3.65,3.1,2.13,1.67,1.35,1.25,Altınordu,3.59,3.18,2.15,1.69,1.35,1.288,-0.05037
8,SavU,LaPa,,,,,,,LaPa,-,23.0,27,-,-,-,
9,Sorkhpooshan Pakdasht,Esteghlal Khuzestan,3.1,2.15,3.4,1.27,1.62,1.32,Esteghlal Khuzestan,3.13,2.27,3.23,1.32,1.595,1.34,-0.068849


In [70]:
df_fonbet.loc[:, ['Player 1', 'Player 2']].to_csv('fonbet_names.csv')
df_xstavka.loc[:, ['Player 1', 'Player 2']].to_csv('xstavka_names.csv')