In [14]:
# Import all packages

import bs4    # BeautifulSoup is used to scrape websites by parsing the HTML
import requests # Requests is used to make HTTP requests
import pandas as pd # Pandas is used to create "dataframes" which are used for data analysis
import copy

In [15]:
# Making a request to the URL to get the entire HTML of the page

URL="https://www.hltv.org/stats/players?rankingFilter=Top5000" 
#Taking 7 years player data to get better understadning of ratings
HEADER = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'} # This is used to prevent the website from blocking the scraper
res = requests.get(URL, headers=HEADER) # This is the request to the URL with the headers passed in as a parameter
res.status_code # This will return the status code of the request (200 if okay)

200

In [16]:
# Parsing the HTML of the page

text=res.content # This is the HTML of the page
soup= bs4.BeautifulSoup(text) # This is the BeautifulSoup object that will be used to parse the HTML
soup.title.text, soup.h1.text # This will return the title and h1 of the page

('CS:GO Player statistics database | HLTV.org', 'RECENT ACTIVITY')

In [17]:
# Checking total number of tables to be scraped

player_table= soup.find_all('table', attrs={"class": "stats-table player-ratings-table"}) # This is the table that contains the player data
len(player_table) # This will return the number of tables that were found
player_table=player_table[0] # This is the first and only table in the list of tables

In [18]:
# Dividing the tale

header= player_table.find("thead") # This is the header of the table
details= player_table.find("tbody") # This is the body of the table

In [19]:
# Checking out the header to see structure and number of columns in the
header

<thead>
<tr class="stats-table-row">
<th class="playerCol">Player</th>
<th class="teamCol">Teams</th>
<th class="mapsCol">Maps</th>
<th class="rounds-col gtSmartphone-only">Rounds</th>
<th class="kdDiffCol">K-D Diff</th>
<th class="kdCol">K/D</th>
<th class="ratingCol">Rating<span class="ratingDesc">1.0</span></th>
</tr>
</thead>

In [20]:
# We will look for all table rows in details and then look for all table cells in each row

details= details.find_all("tr") # This is the list of table rows

In [21]:
# Define the columns of the dataframe with meaningful names

# Finding the headers of the table
headers = player_table.find('thead').find_all('th')
# Renaming the headers
headers[0].string = 'Player'
headers[1].string = 'Teams'
headers[2].string = 'Maps'
headers[3].string = 'Rounds'
headers[4].string = 'K-D Diff'
headers[5].string = 'K/D'
headers[6].string = 'Rating (1.0)'

headers

[<th class="playerCol">Player</th>,
 <th class="teamCol">Teams</th>,
 <th class="mapsCol">Maps</th>,
 <th class="rounds-col gtSmartphone-only">Rounds</th>,
 <th class="kdDiffCol">K-D Diff</th>,
 <th class="kdCol">K/D</th>,
 <th class="ratingCol">Rating (1.0)</th>]

In [22]:
base_url='https://www.hltv.org'

player_detail_link=[]


In [None]:
list=[]
for tr in details:
    #name
    try:
        name= tr.find_all('td', attrs={"class": "playerCol"})[0].find("a").text
        extra_details=tr.find_all('td', attrs={"class": "playerCol"})[0].find_all("a",href=True)[0]['href']
        
        #teams
        teams= []
        team_td= tr.find_all('td', attrs={"class": "teamCol"})[0].find_all("a")
        for a in team_td:
            teams.append(a.find("img").get("title"))
        #maps played
        no_of_maps= tr.find_all('td', attrs={"class": "statsDetail"})[0].text
        #rounds played
        no_of_rounds= tr.find_all('td', attrs={"class": "gtSmartphone-only"})[0].text
        #kd difference
        kd_difference= tr.find_all('td', attrs={"class": "kdDiffCol"})[0].text
        #k/d
        kd= tr.find_all('td', attrs={"class": "statsDetail"})[2].text
        #HLTV Rating
        hltv_rating= tr.find_all('td', attrs={"class": "ratingCol ratingPositive"})[0].text
        player_detail_link.append(base_url+extra_details)
        #creating element
        element={
            # define the dictionary
            'Name': name,
            'Teams': teams,
            'Maps': no_of_maps,
            'Rounds': no_of_rounds,
            'K-D Diff': kd_difference,
            'K/D': kd,
            'Rating (1.0)': hltv_rating
        }
        # print(element)
        #appending element
        list.append(element)
    except:
        pass
print (list)

In [24]:
df1=pd.DataFrame(list)
df1

Unnamed: 0,Name,Teams,Maps,Rounds,K-D Diff,K/D,Rating (1.0)
0,ZywOo,"[Vitality, aAa]",1108,29209,+6778,1.38,1.27
1,s1mple,[Natus Vincere],1649,43626,+9404,1.34,1.24
2,sh1ro,"[Gambit Youngsters, Gambit]",937,24889,+5988,1.45,1.23
3,deko,[1WIN],497,13510,+2890,1.37,1.21
4,Kaze,"[Rare Atom, ViCi, Flash, MVP.karnal]",919,24020,+4439,1.31,1.19
...,...,...,...,...,...,...,...
255,Zellsis,"[Swole Patrol, Lazarus]",626,16210,+803,1.07,1.05
256,Bwills,"[Nouns, Triumph, Gaimin Gladiators]",956,24928,+1268,1.08,1.05
257,nexa,"[G2, Valiance, Imperial, OG]",1340,35515,+2088,1.09,1.05
258,rommi,"[Spirit Academy, HOLLYWOOD, PLINK, 1337]",516,13467,+604,1.06,1.05


In [25]:
r=requests.get("https://www.hltv.org/stats/players/11893/zywoo?rankingFilter=Top5000")    

soup = bs4.BeautifulSoup(r.content,'html')
player_name=soup.find('h1',class_="summaryNickname text-ellipsis")
age=soup.find('div',class_="summaryPlayerAge")
# print(age)
heading_list=[]
value_list=[]
for i in soup.find_all('div',class_="stats-row"):
        c=i.find_all('span',class_="")
        
        try:
            value_list.append(c[1].text)
            heading_list.append(c[0].text)
            
        except:
            pass

for i in soup.find_all('div',class_="summaryStatTooltip hiddenTooltip"):
        c=i.find_all('b',class_="")
        # print(c[0].text)
        heading_list.append(c[0].text)
        
for i in soup.find_all('div',class_="summaryStatBreakdownDataValue"):
            # print(i.text)
            value_list.append(i.text)

print(dict(zip(heading_list,value_list)))

    # final.append(dict(zip(heading_list,value_list)))

{'Total kills': '24631', 'Headshot %': '41.1%', 'Total deaths': '17853', 'K/D Ratio': '1.38', 'Damage / Round': '87.8', 'Grenade dmg / Round': '3.7', 'Maps played': '1108', 'Rounds played': '29209', 'Kills / round': '0.84', 'Assists / round': '0.12', 'Deaths / round': '0.61', 'Saved by teammate / round': '0.09', 'Saved teammates / round': '0.12', 'Rating 2.0': '1.32', 'DPR': '0.61', 'KAST': '75.1%', 'Impact': '1.44', 'ADR': '87.8', 'KPR': '0.84'}


In [26]:
from bs4 import BeautifulSoup 
final=[]
count=0

In [31]:
for j in player_detail_link:
    # print(j)    
    r=requests.get(j)    
    soup = bs4.BeautifulSoup(r.content,'html')
    player_name=soup.find('h1',class_="summaryNickname text-ellipsis")
    age=soup.find('div',class_="summaryPlayerAge")
    
    heading_list=[]
    value_list=[]
    heading_list.append('Real Name')
    heading_list.append('Age')
    value_list.append(player_name.text)
    value_list.append(age.text)
    for i in soup.find_all('div',class_="stats-row"):
            c=i.find_all('span',class_="")            
            try:
                value_list.append(c[1].text)
                # print(value_list)
                heading_list.append(c[0].text)
                
            except:
                pass

    for i in soup.find_all('div',class_="summaryStatTooltip hiddenTooltip"):
            c=i.find_all('b',class_="")
            # print(c[0].text)
            heading_list.append(c[0].text)
            
    for i in soup.find_all('div',class_="summaryStatBreakdownDataValue"):
                # print(i.text)
                value_list.append(i.text)
    dictinary_of_values_headings = dict(zip(heading_list, value_list))
    count+=1
    print(count)
    final.append(dictinary_of_values_headings)
# print(final)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260


In [32]:
df=pd.DataFrame(final)
df

Unnamed: 0,Real Name,Age,Total kills,Headshot %,Total deaths,K/D Ratio,Damage / Round,Grenade dmg / Round,Maps played,Rounds played,...,Deaths / round,Saved by teammate / round,Saved teammates / round,Rating 2.0,DPR,KAST,Impact,ADR,KPR,Rating 1.0
0,ZywOo,22 years,24631,41.1%,17853,1.38,87.8,3.7,1108,29209,...,0.61,0.09,0.12,1.32,0.61,75.1%,1.44,87.8,0.84,
1,s1mple,25 years,37131,41.2%,27727,1.34,85.9,2.4,1649,43626,...,0.64,0.08,0.11,,0.64,74.1%,1.35,85.9,0.85,1.24
2,sh1ro,21 years,19243,28.0%,13255,1.45,78.8,3.9,937,24889,...,0.53,0.08,0.11,1.26,0.53,76.3%,1.21,78.8,0.77,
3,deko,21 years,10801,36.7%,7911,1.37,80.5,3.8,497,13510,...,0.59,0.08,0.11,1.23,0.59,74.3%,1.21,80.5,0.80,
4,Kaze,28 years,18623,30.4%,14184,1.31,79.3,2.9,919,24020,...,0.59,0.08,0.10,,0.59,73.7%,1.22,79.3,0.78,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Zellsis,24 years,11657,46.4%,10854,1.07,79.2,4.8,626,16210,...,0.67,0.10,0.11,1.11,0.67,71.5%,1.11,79.2,0.72,
256,Bwills,21 years,17643,50.6%,16375,1.08,78.1,6.3,956,24928,...,0.66,0.11,0.14,1.10,0.66,72.7%,1.05,78.1,0.71,
257,nexa,25 years,24649,51.0%,22561,1.09,75.9,4.5,1340,35515,...,0.64,0.10,0.11,,0.64,73.0%,1.01,75.9,0.69,1.05
258,rommi,29 years,9899,50.8%,9295,1.06,81.1,3.7,516,13467,...,0.69,0.08,0.10,,0.69,69.7%,1.05,81.1,0.74,1.05


In [33]:
df1

Unnamed: 0,Name,Teams,Maps,Rounds,K-D Diff,K/D,Rating (1.0)
0,ZywOo,"[Vitality, aAa]",1108,29209,+6778,1.38,1.27
1,s1mple,[Natus Vincere],1649,43626,+9404,1.34,1.24
2,sh1ro,"[Gambit Youngsters, Gambit]",937,24889,+5988,1.45,1.23
3,deko,[1WIN],497,13510,+2890,1.37,1.21
4,Kaze,"[Rare Atom, ViCi, Flash, MVP.karnal]",919,24020,+4439,1.31,1.19
...,...,...,...,...,...,...,...
255,Zellsis,"[Swole Patrol, Lazarus]",626,16210,+803,1.07,1.05
256,Bwills,"[Nouns, Triumph, Gaimin Gladiators]",956,24928,+1268,1.08,1.05
257,nexa,"[G2, Valiance, Imperial, OG]",1340,35515,+2088,1.09,1.05
258,rommi,"[Spirit Academy, HOLLYWOOD, PLINK, 1337]",516,13467,+604,1.06,1.05


In [34]:
merged_df=pd.concat([df1,df],axis=1)

In [35]:
del merged_df ['Real Name']	

In [36]:
merged_df

Unnamed: 0,Name,Teams,Maps,Rounds,K-D Diff,K/D,Rating (1.0),Age,Total kills,Headshot %,...,Deaths / round,Saved by teammate / round,Saved teammates / round,Rating 2.0,DPR,KAST,Impact,ADR,KPR,Rating 1.0
0,ZywOo,"[Vitality, aAa]",1108,29209,+6778,1.38,1.27,22 years,24631,41.1%,...,0.61,0.09,0.12,1.32,0.61,75.1%,1.44,87.8,0.84,
1,s1mple,[Natus Vincere],1649,43626,+9404,1.34,1.24,25 years,37131,41.2%,...,0.64,0.08,0.11,,0.64,74.1%,1.35,85.9,0.85,1.24
2,sh1ro,"[Gambit Youngsters, Gambit]",937,24889,+5988,1.45,1.23,21 years,19243,28.0%,...,0.53,0.08,0.11,1.26,0.53,76.3%,1.21,78.8,0.77,
3,deko,[1WIN],497,13510,+2890,1.37,1.21,21 years,10801,36.7%,...,0.59,0.08,0.11,1.23,0.59,74.3%,1.21,80.5,0.80,
4,Kaze,"[Rare Atom, ViCi, Flash, MVP.karnal]",919,24020,+4439,1.31,1.19,28 years,18623,30.4%,...,0.59,0.08,0.10,,0.59,73.7%,1.22,79.3,0.78,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Zellsis,"[Swole Patrol, Lazarus]",626,16210,+803,1.07,1.05,24 years,11657,46.4%,...,0.67,0.10,0.11,1.11,0.67,71.5%,1.11,79.2,0.72,
256,Bwills,"[Nouns, Triumph, Gaimin Gladiators]",956,24928,+1268,1.08,1.05,21 years,17643,50.6%,...,0.66,0.11,0.14,1.10,0.66,72.7%,1.05,78.1,0.71,
257,nexa,"[G2, Valiance, Imperial, OG]",1340,35515,+2088,1.09,1.05,25 years,24649,51.0%,...,0.64,0.10,0.11,,0.64,73.0%,1.01,75.9,0.69,1.05
258,rommi,"[Spirit Academy, HOLLYWOOD, PLINK, 1337]",516,13467,+604,1.06,1.05,29 years,9899,50.8%,...,0.69,0.08,0.10,,0.69,69.7%,1.05,81.1,0.74,1.05


In [37]:
merged_df.columns

Index(['Name', 'Teams', 'Maps', 'Rounds', 'K-D Diff', 'K/D', 'Rating (1.0)',
       'Age', 'Total kills', 'Headshot %', 'Total deaths', 'K/D Ratio',
       'Damage / Round', 'Grenade dmg / Round', 'Maps played', 'Rounds played',
       'Kills / round', 'Assists / round', 'Deaths / round',
       'Saved by teammate / round', 'Saved teammates / round', 'Rating 2.0',
       'DPR', 'KAST', 'Impact', 'ADR', 'KPR', 'Rating 1.0'],
      dtype='object')

In [38]:
# Merge the two rating columns
merged_df['Rating'] = merged_df['Rating 1.0'].fillna(merged_df['Rating 2.0'])

# Drop the original rating columns
merged_df = merged_df.drop(['Rating 1.0', 'Rating 2.0'], axis=1)


In [39]:
merged_df.to_csv('Tier1&2.csv')