In [136]:
import requests
import json
import re
import os
from fake_useragent import UserAgent
import time

import pandas as pd

import csv

from bs4 import BeautifulSoup as bs

from selenium_stealth import stealth

from selenium import webdriver

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains

In [14]:
def get_random_chrome_user_agent(): 
    """  
    Функция для генерации случайного user агента 
    """
    user_agent = UserAgent(browsers='chrome', os='windows', platforms='pc')
    return user_agent.random


def create_driver(user_id): #  Функция для создания driver selenium (Аргументы: id пользователя для конкретного сайта)
    options = Options()
    
    options.add_experimental_option("excludeSwitches", ["enable-automation"]) # Опция, исключающая переключатель "enable-automation"
    options.add_experimental_option('useAutomationExtension', False) # Опция, отключающая расширение автоматизации в браузере
    options.add_argument("start-maximized") # Аргумент для запуска браузера в режиме максимального окна
    
    script_dir = os.getcwd() # Получение пути к корневому репозиторию
    base_directory = os.path.join(script_dir, 'users') # Создание пути к репозиторию users
    user_directory = os.path.join(base_directory, f'user_{user_id}') # Создание пути к репозиторию пользователя по номеру id

    options.add_argument(f'user-data-dir={user_directory}') # Аргумент, указывающий на директорию с данными пользователя
    # options.add_argument('--disable-gpu')
    # options.add_argument('--disable-dev-shm-usage')
    options.add_argument("--disable-notifications") # Отключаются уведомления в браузере
    options.add_argument("--disable-popup-blocking") # Отключается блокировка всплывающих окон
    # options.add_argument('--no-sandbox')

    driver = webdriver.Chrome(options=options) # Создается экземпляр webdriver для Chrome с заданными параметрами options
    ua = get_random_chrome_user_agent() # Вызов функции get_random_chrome_user_agent
    stealth(driver=driver, # Вызывает функцию stealth для маскировки использования webdriver, с установкой различных параметров 
            user_agent=ua,
            languages=["ru-RU", "ru"],
            vendor="Google Inc.",
            platform="Win32",
            webgl_vendor="Intel Inc.",
            renderer="Intel Iris OpenGL Engine",
            fix_hairline=True,
            run_on_insecure_origins=True
            )
    return driver

In [39]:
def data_parse(driver): # Функция для парсинга данных (Аргументы: объект webdriver)
    result_list = [] # Создание пустого списка
    result_dict = {}
    
    action = ActionChains(driver) # Вызывается функция ActionChains для инициализации action

    teams = driver.find_elements("xpath", '//div[contains(@class, "stats-section")]/table[contains(@class, "stats-table player-ratings-table")]/tbody/tr')

    for team in teams:
        team_url_url = team.find_element("xpath", './/td/a')
        team_name = team_url_url.text
        team_url = team_url_url.get_attribute('href')
        result_dict = {'Name': team_name, 'Url': team_url}
        result_list.append(result_dict)
        
    return result_list

In [29]:
user_id = 3 # Инициализация переменной id пользователя
url = "https://www.hltv.org/stats/teams?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50" # Ссылка на сайт
driver = create_driver(user_id) # Вызов функции create_driver
driver.get(url) 
ipynb_dir = os.getcwd() # Получение пути к корневому репозиторию


In [40]:
result_list = data_parse(driver)
with open(f"{ipynb_dir}\HLTV_Teams.json", "w", encoding='utf-8') as outfile:
            outfile.write(json.dumps(result_list, ensure_ascii=False, indent=4)) # Сохранение файла DetMir.json в корневой папке с словарем

In [43]:
# Чтение JSON-файла
with open('HLTV_Teams.json', 'r', encoding='utf-8') as file:
    data_teams = json.load(file)

# Вывод данных
print(data_teams[0]['Url'])

https://www.hltv.org/stats/teams/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50


In [63]:
def str_to_number(s):
    try:
        if '.' in s:
            return float(s)
        return int(s)
    except ValueError:
        raise ValueError(f"Cannot convert '{s}' to a number")

In [64]:
def data_parse1(driver): # Функция для парсинга данных (Аргументы: объект webdriver)
    result_list = [] # Создание пустого списка
    result_dict = {}
    
    action = ActionChains(driver) # Вызывается функция ActionChains для инициализации action

    teams = driver.find_elements("xpath", '//div[contains(@class, "columns")]/div[contains(@class, "col standard-box big-padding")]')

    for team in teams:
        stats_stats_url = team.find_element("xpath", './/div[contains(@class, "large-strong")]')
        stats_stats = stats_stats_url.text
        stats_name_url = team.find_element("xpath", './/div[contains(@class, "small-label-below")]')
        stats_name = stats_name_url.text.lower()
        if stats_name == 'wins / draws / losses':
            stats_name = stats_name.split(' / ')
            stats_stats = stats_stats.split(' / ')
            result_dict[stats_name[0]] =  str_to_number(stats_stats[0])
            result_dict[stats_name[2]] =  str_to_number(stats_stats[2])
        else:
            result_dict[stats_name] =  str_to_number(stats_stats)
        
    return result_dict

In [69]:
slovar = {}
for i in range(len(data_teams)):
    user_id = i # Инициализация переменной id пользователя
    url = data_teams[i]['Url'] # Ссылка на сайт
    driver = create_driver(user_id) # Вызов функции create_driver
    driver.get(url) 
    ipynb_dir = os.getcwd() # Получение пути к корневому репозиторию
    result_dict = data_parse1(driver)
    slovar[data_teams[i]['Name']] = result_dict
    print(slovar)
    driver.quit()

{'Vitality': [{'maps played': 82, 'wins': 55, 'losses': 27, 'total kills': 6420, 'total deaths': 5828, 'rounds played': 1842, 'k/d ratio': 1.1}]}
{'Vitality': [{'maps played': 82, 'wins': 55, 'losses': 27, 'total kills': 6420, 'total deaths': 5828, 'rounds played': 1842, 'k/d ratio': 1.1}], 'Natus Vincere': [{'maps played': 92, 'wins': 63, 'losses': 29, 'total kills': 6881, 'total deaths': 6268, 'rounds played': 1989, 'k/d ratio': 1.1}]}
{'Vitality': [{'maps played': 82, 'wins': 55, 'losses': 27, 'total kills': 6420, 'total deaths': 5828, 'rounds played': 1842, 'k/d ratio': 1.1}], 'Natus Vincere': [{'maps played': 92, 'wins': 63, 'losses': 29, 'total kills': 6881, 'total deaths': 6268, 'rounds played': 1989, 'k/d ratio': 1.1}], 'Spirit': [{'maps played': 79, 'wins': 52, 'losses': 27, 'total kills': 5707, 'total deaths': 5212, 'rounds played': 1659, 'k/d ratio': 1.09}]}
{'Vitality': [{'maps played': 82, 'wins': 55, 'losses': 27, 'total kills': 6420, 'total deaths': 5828, 'rounds played'

In [71]:
with open(f"{ipynb_dir}\Teams_Overview.json", "w", encoding='utf-8') as outfile:
            outfile.write(json.dumps(slovar, ensure_ascii=False, indent=4)) # Сохранение файла DetMir.json в корневой папке с словарем

In [73]:
Maps = {
'Ancient': 47,
'Anubis': 48,
'Dust 2': 31,
'Mirage': 32,
'Inferno': 33,
'Nuke': 34,
'Vertigo': 46
}

In [89]:
data_teams_maps = {}
for j in range(len(data_teams)):
    splt = data_teams[j]['Url'].split('teams/')
    teams_maps = {}
    for Map in Maps:
        teams_maps[Map] = splt[0] + f'teams/map/{Maps[Map]}/' + splt[1]
    data_teams_maps[data_teams[j]['Name']] = teams_maps

In [90]:
data_teams_maps

{'Vitality': {'Ancient': 'https://www.hltv.org/stats/teams/map/47/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
  'Anubis': 'https://www.hltv.org/stats/teams/map/48/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
  'Dust 2': 'https://www.hltv.org/stats/teams/map/31/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
  'Mirage': 'https://www.hltv.org/stats/teams/map/32/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
  'Inferno': 'https://www.hltv.org/stats/teams/map/33/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
  'Nuke': 'https://www.hltv.org/stats/teams/map/34/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
  'Vertigo': 'https://www.hltv.org/stats/teams/map/46/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50'},
 'Natus Vincere': {'Ancient': 'https://www.hltv.org/stats/teams/map/47/4608/natus-v

In [127]:
with open(f"{ipynb_dir}\Maps_URL.json", "w", encoding='utf-8') as outfile:
            outfile.write(json.dumps(data_teams_maps, ensure_ascii=False, indent=4)) # Сохранение файла DetMir.json в корневой папке с словарем

In [121]:
def data_parse2(driver): # Функция для парсинга данных (Аргументы: объект webdriver)
    result_list = [] # Создание пустого списка
    result_dict = {}
    
    action = ActionChains(driver) # Вызывается функция ActionChains для инициализации action

    teams = driver.find_elements("xpath", '//div[contains(@class, "stats-rows standard-box")]/div[contains(@class, "stats-row")]')

    for team in teams:
        stats_stats_url = team.find_element("xpath", './/span[last()]')
        stats_stats = stats_stats_url.text.replace('%', '')
        stats_name_url = team.find_element("xpath", './/span[contains(@class, "strong")]')
        stats_name = stats_name_url.text.lower()
        if stats_name == 'wins / draws / losses':
            stats_name = stats_name.split(' / ')
            stats_stats = stats_stats.split(' / ')
            result_dict[stats_name[0]] =  str_to_number(stats_stats[0])
            result_dict[stats_name[2]] =  str_to_number(stats_stats[2])
        else:
            result_dict[stats_name] =  str_to_number(stats_stats)
    result_list.append(result_dict)
    return result_list

In [104]:
user_id = 36 # Инициализация переменной id пользователя
url = data_teams_maps['Vitality']['Nuke'] # Ссылка на сайт
driver = create_driver(user_id) # Вызов функции create_driver
driver.get(url) 
result_dict = data_parse2(driver)

In [128]:
stats_teams_maps = {}
stats_maps = {}
for i in data_teams_maps:
    user_id = i # Инициализация переменной id пользователя
    for Map in Maps:
        url = data_teams_maps[i][Map] # Ссылка на сайт
        driver = create_driver(user_id) # Вызов функции create_driver
        time.sleep(10)
        driver.get(url) 
        ipynb_dir = os.getcwd() # Получение пути к корневому репозиторию
        result_list = data_parse2(driver)
        stats_maps[Map] = result_list
        help_lst = []
        help_lst.append(stats_maps)
        driver.quit()
    stats_teams_maps[i] = help_lst
    print(stats_teams_maps)

{'Vitality': [{'Ancient': [{'times played': 0, 'wins': 0, 'losses': 0, 'total rounds played': 0, 'rounds won': 0, 'win percent': 0.0, 'pistol rounds': 0, 'pistol rounds won': 0, 'pistol round win percent': 0.0, 'ct round win percent': 0.0, 't round win percent': 0.0}], 'Anubis': [{'times played': 13, 'wins': 6, 'losses': 7, 'total rounds played': 314, 'rounds won': 160, 'win percent': 46.2, 'pistol rounds': 26, 'pistol rounds won': 9, 'pistol round win percent': 34.6, 'ct round win percent': 44.9, 't round win percent': 57.0}], 'Dust 2': [{'times played': 19, 'wins': 13, 'losses': 6, 'total rounds played': 385, 'rounds won': 223, 'win percent': 68.4, 'pistol rounds': 38, 'pistol rounds won': 23, 'pistol round win percent': 60.5, 'ct round win percent': 52.0, 't round win percent': 63.1}], 'Mirage': [{'times played': 14, 'wins': 11, 'losses': 3, 'total rounds played': 356, 'rounds won': 198, 'win percent': 78.6, 'pistol rounds': 28, 'pistol rounds won': 15, 'pistol round win percent': 5

In [129]:
with open(f"{ipynb_dir}\Teams_Maps.json", "w", encoding='utf-8') as outfile:
            outfile.write(json.dumps(stats_teams_maps, ensure_ascii=False, indent=4)) # Сохранение файла DetMir.json в корневой папке с словарем

In [133]:
# Создание CSV файла
with open('team_stats.csv', 'w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['team', 'map', 'times played', 'wins', 'losses', 'total rounds played', 'rounds won', 'win percent', 'pistol rounds', 'pistol rounds won', 'pistol round win percent', 'ct round win percent', 't round win percent']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()

    # Проход по каждой команде и карте
    for team, maps in stats_teams_maps.items():
        for map_stats in maps:
            for map_name, stats_list in map_stats.items():
                for stats in stats_list:
                    row = {
                        'team': team,
                        'map': map_name,
                        'times played': stats.get('times played', 0),
                        'wins': stats.get('wins', 0),
                        'losses': stats.get('losses', 0),
                        'total rounds played': stats.get('total rounds played', 0),
                        'rounds won': stats.get('rounds won', 0),
                        'win percent': stats.get('win percent', 0.0),
                        'pistol rounds': stats.get('pistol rounds', 0),
                        'pistol rounds won': stats.get('pistol rounds won', 0),
                        'pistol round win percent': stats.get('pistol round win percent', 0.0),
                        'ct round win percent': stats.get('ct round win percent', 0.0),
                        't round win percent': stats.get('t round win percent', 0.0)
                    }
                    writer.writerow(row)

print("CSV файл создан успешно.")

CSV файл создан успешно.


In [137]:
team_stats = pd.read_csv('team_stats.csv')

In [138]:
team_stats

Unnamed: 0,team,map,times played,wins,losses,total rounds played,rounds won,win percent,pistol rounds,pistol rounds won,pistol round win percent,ct round win percent,t round win percent
0,Vitality,Ancient,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0
1,Vitality,Anubis,43,25,18,944,486,58.1,86,42,48.8,46.3,57.3
2,Vitality,Dust 2,16,6,10,371,172,37.5,32,18,56.2,48.3,44.6
3,Vitality,Mirage,22,7,15,504,241,31.8,44,22,50.0,46.3,49.6
4,Vitality,Inferno,15,8,7,324,177,53.3,30,18,60.0,53.9,55.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,Zero Tenacity,Dust 2,16,6,10,371,172,37.5,32,18,56.2,48.3,44.6
311,Zero Tenacity,Mirage,22,7,15,504,241,31.8,44,22,50.0,46.3,49.6
312,Zero Tenacity,Inferno,15,8,7,324,177,53.3,30,18,60.0,53.9,55.6
313,Zero Tenacity,Nuke,19,10,9,422,200,52.6,38,20,52.6,54.5,40.4


In [140]:
# Запись в CSV файл
with open('team_overall_stats.csv', 'w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['team', 'maps played', 'wins', 'losses', 'total kills', 'total deaths', 'rounds played', 'k/d ratio']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()
    
    # Проход по каждой команде и их статистике
    for team, stats_list in slovar.items():
        for stats in stats_list:
            row = {
                'team': team,
                'maps played': stats.get('maps played', 0),
                'wins': stats.get('wins', 0),
                'losses': stats.get('losses', 0),
                'total kills': stats.get('total kills', 0),
                'total deaths': stats.get('total deaths', 0),
                'rounds played': stats.get('rounds played', 0),
                'k/d ratio': stats.get('k/d ratio', 0)
            }
            writer.writerow(row)

print("CSV файл создан успешно.")

CSV файл создан успешно.


In [141]:
# Чтение CSV файла
team_overall_stats = pd.read_csv('team_overall_stats.csv')
team_overall_stats

Unnamed: 0,team,maps played,wins,losses,total kills,total deaths,rounds played,k/d ratio
0,Vitality,82,55,27,6420,5828,1842,1.1
1,Natus Vincere,92,63,29,6881,6268,1989,1.1
2,Spirit,79,52,27,5707,5212,1659,1.09
3,MOUZ,83,50,33,6266,5851,1817,1.07
4,The MongolZ,86,53,33,6551,6111,1902,1.07
5,G2,111,71,40,8098,7656,2399,1.06
6,MIBR,117,66,51,8146,7866,2452,1.04
7,PARIVISION,85,45,40,6284,6063,1839,1.04
8,9 Pandas,92,45,47,6726,6632,2024,1.01
9,Monte,132,65,67,10238,9824,2975,1.04


In [145]:
data_teams_info = {}
for j in range(len(data_teams)):
    splt = data_teams[j]['Url'].split('stats/teams/')
    teams_maps = splt[0] + f'team/' + splt[1]
    data_teams_info[data_teams[j]['Name']] = teams_maps

In [146]:
data_teams_info

{'Vitality': 'https://www.hltv.org/team/9565/vitality?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'Natus Vincere': 'https://www.hltv.org/team/4608/natus-vincere?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'Spirit': 'https://www.hltv.org/team/7020/spirit?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'MOUZ': 'https://www.hltv.org/team/4494/mouz?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'The MongolZ': 'https://www.hltv.org/team/6248/the-mongolz?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'G2': 'https://www.hltv.org/team/5995/g2?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'MIBR': 'https://www.hltv.org/team/9215/mibr?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 'PARIVISION': 'https://www.hltv.org/team/12467/parivision?startDate=2024-03-29&endDate=2024-09-29&rankingFilter=Top50',
 '9 Pandas': 'https://www.hltv.org/team/11883/9-pandas?startDate=2024-03-29&endDate

In [147]:
with open(f"{ipynb_dir}\Teams_Info_URL.json", "w", encoding='utf-8') as outfile:
            outfile.write(json.dumps(data_teams_info, ensure_ascii=False, indent=4)) # Сохранение файла DetMir.json в корневой папке с словарем

In [163]:
def data_parse3(driver): # Функция для парсинга данных (Аргументы: объект webdriver)
    result_list = [] # Создание пустого списка
    result_dict = {}
    
    action = ActionChains(driver) # Вызывается функция ActionChains для инициализации action

    teams = driver.find_elements("xpath", '//div[contains(@class, "profile-team-stats-container")]')

    for team in teams:
        valve_url = team.find_element("xpath", './/div[contains(@class, "profile-team-stat-50-50")]/div[contains(@class, "profile-team-stat")][1]/span/a')
        valve_stats = valve_url.text.replace('#', '')
        # print(valve_stats)
        world_url = team.find_element("xpath", './/div[contains(@class, "profile-team-stat-50-50")]/div[contains(@class, "profile-team-stat")][last()]/span/a')
        world_stats = world_url.text.replace('#', '')
        # print(world_stats)
        top30_url = driver.find_element("xpath", '/html/body/div[4]/div[8]/div[2]/div[1]/div[2]/div[2]/div[2]/div[2]/span[contains(@class, "right")]')
        top30_stats = top30_url.text
        # print(top30_stats)
        result_dict =  {'valve ranking': str_to_number(valve_stats), 'world ranking': str_to_number(world_stats), 'weeks in top30 for core': str_to_number(top30_stats)}
    # result_list.append(result_dict)
    return result_dict

In [161]:
user_id = 44 # Инициализация переменной id пользователя
url = data_teams_info['Vitality'] # Ссылка на сайт
driver = create_driver(user_id) # Вызов функции create_driver
driver.get(url) 
result_dict = data_parse3(driver)
# driver.quit()
result_dict

3
2
111


{'valve ranking': 3, 'world ranking': 2, 'weeks in top30 for core': 111}

In [164]:
stats_teams_maps = {}
teams_info = {}
for i in data_teams_maps:
    user_id = i # Инициализация переменной id пользователя
    url = data_teams_info[i] # Ссылка на сайт
    driver = create_driver(user_id) # Вызов функции create_driver
    # time.sleep(10)
    driver.get(url) 
    ipynb_dir = os.getcwd() # Получение пути к корневому репозиторию
    result_dict = data_parse3(driver)
    teams_info[i] = result_dict
    driver.quit()
    print(teams_info)
teams_info

{'Vitality': {'valve ranking': 3, 'world ranking': 2, 'weeks in top30 for core': 111}}
{'Vitality': {'valve ranking': 3, 'world ranking': 2, 'weeks in top30 for core': 111}, 'Natus Vincere': {'valve ranking': 1, 'world ranking': 1, 'weeks in top30 for core': 63}}
{'Vitality': {'valve ranking': 3, 'world ranking': 2, 'weeks in top30 for core': 111}, 'Natus Vincere': {'valve ranking': 1, 'world ranking': 1, 'weeks in top30 for core': 63}, 'Spirit': {'valve ranking': 4, 'world ranking': 4, 'weeks in top30 for core': 59}}
{'Vitality': {'valve ranking': 3, 'world ranking': 2, 'weeks in top30 for core': 111}, 'Natus Vincere': {'valve ranking': 1, 'world ranking': 1, 'weeks in top30 for core': 63}, 'Spirit': {'valve ranking': 4, 'world ranking': 4, 'weeks in top30 for core': 59}, 'MOUZ': {'valve ranking': 6, 'world ranking': 5, 'weeks in top30 for core': 68}}
{'Vitality': {'valve ranking': 3, 'world ranking': 2, 'weeks in top30 for core': 111}, 'Natus Vincere': {'valve ranking': 1, 'world ran

{'Vitality': {'valve ranking': 3,
  'world ranking': 2,
  'weeks in top30 for core': 111},
 'Natus Vincere': {'valve ranking': 1,
  'world ranking': 1,
  'weeks in top30 for core': 63},
 'Spirit': {'valve ranking': 4,
  'world ranking': 4,
  'weeks in top30 for core': 59},
 'MOUZ': {'valve ranking': 6,
  'world ranking': 5,
  'weeks in top30 for core': 68},
 'The MongolZ': {'valve ranking': 10,
  'world ranking': 13,
  'weeks in top30 for core': 52},
 'G2': {'valve ranking': 2,
  'world ranking': 3,
  'weeks in top30 for core': 140},
 'MIBR': {'valve ranking': 21,
  'world ranking': 14,
  'weeks in top30 for core': 54},
 'PARIVISION': {'valve ranking': 40,
  'world ranking': 43,
  'weeks in top30 for core': 0},
 '9 Pandas': {'valve ranking': 45,
  'world ranking': 36,
  'weeks in top30 for core': 22},
 'Monte': {'valve ranking': 37,
  'world ranking': 40,
  'weeks in top30 for core': 0},
 'Eternal Fire': {'valve ranking': 5,
  'world ranking': 6,
  'weeks in top30 for core': 194},
 'B8

In [165]:
with open(f"{ipynb_dir}\Teams_Ranks.json", "w", encoding='utf-8') as outfile:
            outfile.write(json.dumps(teams_info, ensure_ascii=False, indent=4)) # Сохранение файла DetMir.json в корневой папке с словарем