In [1]:
import cloudscraper
from bs4 import BeautifulSoup
import re

In [3]:
scraper = cloudscraper.create_scraper()  # returns a CloudScraper instance
matches_url = "https://csgostats.gg/match"
content = scraper.get(matches_url).text

In [142]:
soup = BeautifulSoup(content , 'html.parser')

from datetime import datetime , timedelta
import attr
from attr import validators
from collections import defaultdict
from urllib.parse import urljoin
from typing import Dict , Union , List

def calculate_datetime(time_of_logging):
    return (datetime.now() - timedelta(minutes = int(time_of_logging.split(' ')[0]))).strftime('%Y-%m-%d %H-%M-%S')
    

class Chameleon:
        
    def __init__(self , url : str) -> None:
        self.url = str(url)
        self.scraper = cloudscraper.create_scraper()
        self.soup = None
        
    def observe(self , **kwargs) -> bool:
        try:
            self.soup = BeautifulSoup( self.scraper.get( urljoin(self.url , 
                                                                 self.get_endpoint(**kwargs)) ).text , 
                                      'html.parser')
            return True
        except Exception as e:
            print(e)
            return False
        
    def get_endpoint(self) -> str:
        pass
        
    def parse(self):
        pass
    

class CSGOChameleon(Chameleon):
    
    endpoints = {'MatchesChameleon':'matches',
                 'PlayersChameleon':'leaderboards',
                 'MatchesDetailsChameleon':''}
    
    def __init__(self):
        super().__init__('https://csgostats.gg')
        
    def get_endpoint(self) -> str:
        return CSGOChameleon.endpoints.get(type(self).__name__)
        
        
class MatchesChameleon(CSGOChameleon):
    
    def parse(self) -> Dict[str, Union[str , List[str] , int]]:
        all_matches = []
        
        for soupie in self.soup.find_all('tr', {'class':'p-row js-link'}):
            match_id = re.findall( r'match/(\d+)' , soupie.attrs['onclick'])[0]

            csmap = soupie.find('img' , {'title':re.compile(r'de_*')}).attrs['title']

            rank = soupie.find('img', {'src':re.compile(r'ranks')}).attrs['title']

            teams = list(map(lambda x : x.attrs['title'] , 
                             soupie.find_all('img',{'src':re.compile(r'avatars')})
                            )
                        )
            t_team , ct_team = teams[:5] , teams[5:]

            date = calculate_datetime(soupie.find('td',{'class':'nowrap'}).string.strip())

            matches['match_id'] = match_id
            matches['map'] = csmap
            matches['date'] = date
            matches['rank'] = rank
            matches['players'] = {'ct_team': ct_team,
                                   't_team': t_team}
            yield matches
            
            
class PlayersChameleon(CSGOChameleon):
    
    def get_endpoint(self , page : int) -> str:
        return super().get_endpoint() + f'?page={page}'
    
    def parse(self):
        player_data = []
        for soupie in self.soup.find_all('div',{'onclick':re.compile(r'player')}):
            player_id = re.findall( r'player/(\d+)' , soupie.attrs['onclick'])[0]

            player_name = soupie.find('a').text.strip('\n').strip()
            weapons = soupie.find_all('img',{'src': re.compile(r'weapons')})
            primary_weapon = weapons[0].attrs['title']
            secondary_weapon = weapons[1].attrs['title']

            cake = soupie.find_all('div' , {'style':'float:left; width:10%;text-align:center;'})[2:]
            kd = cake[0].find('span').string.strip()
            hs = cake[1].string
            win_rate = cake[2].next_element.strip('\n').strip()
            vx = cake[3].string
            rating = cake[4].string

            player_data.append( {'player_id' : player_id,
                                 'player_name': player_name,
                                 'primary_weapon' : primary_weapon,
                                 'secondary_weapon': secondary_weapon,
                                 'kd': kd,
                                 'hs':hs,
                                 'win_rate':win_rate,
                                 'rating':rating
                                 })
        return player_data
            
    
class MatchesDetailsChameleon(Chameleon):
    
    def parse(self):
        rounds = {'round_id' : ''}

        for round_ , soupie in enumerate(soup.find_all('div', class_ = 'round-info-side')):
            if not round_%2:
                continue
                
            round_stats = defaultdict(list)

            for cake in soupie.find_all('div', class_ = 'tl-inner'):

                ##get time
                time = cake.find('span', {'title': re.compile(r'Tick*')}).string
                round_stats["Times"].append(time)

                ##killing team
                killer_team = cake.find('span' , {'class': re.compile(r'team')})\
                                  .attrs['class'][0].split('-')[-1]

                ##actors
                actors = list(map( lambda x : x.string , 
                            cake.find_all('span' , {'class': re.compile(r'team-')})))

                killers = ', '.join(actors[:-1])
                dead = actors[-1]
                round_stats["Actors"].append(killers)
                round_stats["Target"].append(dead)

                ##weapon
                weapon_headshot = cake.find_all('img')
                round_stats['Weapon'].append(weapon_headshot[0].attrs['title'])

                ##headshot
                headshot = len(weapon_headshot) > 1
                round_stats["Headshot"].append(headshot)

            ##winner
            winner = soupie.find('span',class_ = 'attacker').string.strip('\n').strip()
            round_stats['Winner'].append(winner)


            rounds.update({round_: round_stats})
        return rounds

In [145]:
cham = PlayersChameleon()
cham.observe(page = 2)

True

In [147]:
cham.parse()

[{'player_id': '76561198135668261',
  'player_name': 'BRIVES',
  'primary_weapon': 'ak47',
  'secondary_weapon': 'deagle',
  'kd': '167 / 49',
  'hs': '53%',
  'win_rate': '90%',
  'rating': '2.17'},
 {'player_id': '76561198886280955',
  'player_name': 'Pedo Pan',
  'primary_weapon': 'ak47',
  'secondary_weapon': 'm4a1_silencer',
  'kd': '291 / 104',
  'hs': '61%',
  'win_rate': '90%',
  'rating': '2.16'},
 {'player_id': '76561198173297394',
  'player_name': 'WACHO',
  'primary_weapon': 'ak47',
  'secondary_weapon': 'm4a1_silencer',
  'kd': '312 / 139',
  'hs': '40%',
  'win_rate': '70%',
  'rating': '2.16'},
 {'player_id': '76561198423385704',
  'player_name': 'Aevu',
  'primary_weapon': 'awp',
  'secondary_weapon': 'deagle',
  'kd': '124 / 25',
  'hs': '46%',
  'win_rate': '100%',
  'rating': '2.16'},
 {'player_id': '76561198262126621',
  'player_name': 'h0le',
  'primary_weapon': 'awp',
  'secondary_weapon': 'deagle',
  'kd': '193 / 78',
  'hs': '38%',
  'win_rate': '80%',
  'rating

In [113]:
scraper = cloudscraper.create_scraper()
matches_url = "https://csgostats.gg/leaderboards"
content = scraper.get(matches_url).text

In [137]:
soup = BeautifulSoup( content , 'html.parser')
player_data = []
for soupie in soup.find_all('div',{'onclick':re.compile(r'player')}):
    player_id = re.findall( r'player/(\d+)' , soupie.attrs['onclick'])[0]
    
    player_name = soupie.find('a').text.strip('\n').strip()
    weapons = soupie.find_all('img',{'src': re.compile(r'weapons')})
    primary_weapon = weapons[0].attrs['title']
    secondary_weapon = weapons[1].attrs['title']
    
    cake = soupie.find_all('div' , {'style':'float:left; width:10%;text-align:center;'})[2:]
    kd = cake[0].find('span').string.strip()
    hs = cake[1].string
    win_rate = cake[2].next_element.strip('\n').strip()
    vx = cake[3].string
    rating = cake[4].string

    player_data.append( {'player_id' : player_id,
                         'player_name': player_name,
                         'primary_weapon' : primary_weapon,
                         'secondary_weapon': secondary_weapon,
                         'kd': kd,
                         'hs':hs,
                         'win_rate':win_rate,
                         'rating':rating
                         })
    player_data

[{'player_id': '76561199257950294', 'player_name': 'RETIRED DONT INVITE', 'primary_weapon': 'ssg08', 'secondary_weapon': 'usp_silencer', 'kd': '182 / 10', 'hs': '85%', 'win_rate': '100%', 'rating': '4.53'}]
[{'player_id': '76561199257950294', 'player_name': 'RETIRED DONT INVITE', 'primary_weapon': 'ssg08', 'secondary_weapon': 'usp_silencer', 'kd': '182 / 10', 'hs': '85%', 'win_rate': '100%', 'rating': '4.53'}, {'player_id': '76561198971823213', 'player_name': 'canadian egirl', 'primary_weapon': 'ssg08', 'secondary_weapon': 'awp', 'kd': '265 / 34', 'hs': '65%', 'win_rate': '100%', 'rating': '3.72'}]
[{'player_id': '76561199257950294', 'player_name': 'RETIRED DONT INVITE', 'primary_weapon': 'ssg08', 'secondary_weapon': 'usp_silencer', 'kd': '182 / 10', 'hs': '85%', 'win_rate': '100%', 'rating': '4.53'}, {'player_id': '76561198971823213', 'player_name': 'canadian egirl', 'primary_weapon': 'ssg08', 'secondary_weapon': 'awp', 'kd': '265 / 34', 'hs': '65%', 'win_rate': '100%', 'rating': '3.7

In [84]:
chameleon = MatchesChameleon(matches_url)

In [85]:
for x in chameleon.parse():
    print(x)
    break

['68033712']
{'match_id': [], 'date': '2022-06-02 21-32-44', 'map': 'de_ancient', 'players': {'ct_team': ['207', 'Kudia', 'GG', 'your bunny wrote', 'El Johnny'], 't_team': ['No Flex', 'kAr0', 'eLTac ♥ᶠᶸᶜᵏᵧₒᵤ♥', 'Kronislaw Bomorowski', 'cr1s.']}, 'rank': 'Distinguished Master Guardian'}


In [10]:
soup.find_all('div', {'class':'p-row js-link'})

[]

In [35]:
datetime.now()

datetime.datetime(2022, 6, 2, 21, 31, 2, 852419)