# Valorant Project: Webscraping  

<hr>  
  
## Prereq Code

In [68]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [42]:
# Getting the soup

url = "https://www.vlr.gg/events" 
response = requests.get(url).text 
soup = BeautifulSoup(response) 

<hr>

### Collecting team comp, match outcome, and map per game [team A vs team B in Semi Finals]

https://www.vlr.gg/169825/mxb-xt-vietnam-vs-victory-valorant-vietnam-rising-stars-qf/?game=all&tab=overview

#### Storing the Team Comp

##### Looking for the tag with the needed info

    
1. find the div that contains each game
> make sure "All Maps" summary div is not captured
2. the div with no "style" or "class" attribute has the divs for the tables
3. iterate through the two divs that each contain a table (each div represent a team)
4. go to tbody
5. iterate through each tr (each tr represents a player for the team)
6. go to td with the class value of "mod-agents"
7. go to div
8. go to span
9. check the title attribute of the span; has the name of the agent they're playing

In [3]:
# STEP 1a: Find the div
matches = soup.find_all(name = "div", class_ ="vm-stats-game")

In [4]:
# STEP 1b: Filter out the div that contains the "All Maps" section
adj_matches = []
for match in matches:
    if ((match.attrs)["data-game-id"] != "all"):
        if (match.find(name = "div", class_ = "score mod-win").string.strip() != "0"):
            adj_matches.append(match)

In [5]:
len(adj_matches)

2

In [6]:
# STEP 2 to 9
comp_list = []
comp_outcome = []

# STEP 2 to 4: Get the 2 tbody that contains each tema
for match in adj_matches:
    for team in match.find_all("tbody"):
        team_comp = []

        # STEP 5: Iterate through each player in each team
        for player in team.find_all("tr"):
            # STEP 6 to 9: Store the agent selected
            team_comp.append(player.find("img").attrs["title"])
        if team_comp:
            comp_list.append(team_comp)

In [7]:
comp_list

[['Raze', 'Viper', 'Jett', 'Sage', 'Sova'],
 ['Viper', 'Sova', 'Jett', 'Reyna', 'Killjoy'],
 ['Jett', 'Killjoy', 'Sova', 'Astra', 'Breach'],
 ['Astra', 'Sova', 'Cypher', 'Jett', 'Raze']]

#### Storing the Match Outcome

In [8]:
outcome_list = []
for match in adj_matches:
    for team in match.find_all(name = "div", class_ = "score"):
        if "mod-win" in team.attrs["class"]:
            outcome_list.append("win")
        else:
            outcome_list.append("lose")


        

In [9]:
outcome_list

['lose', 'win', 'lose', 'win']

#### Storing Map

In [10]:
map_list = []

for match in adj_matches:
    for i in range(2):
        map_list.append(match.find(name = "div", class_ = "map").find("span").get_text(strip = True))

In [11]:
map_list

['Icebox', 'Icebox', 'Haven', 'Haven']

<hr>

### Iterating through all games given a tournament

Given a URL of a tournament, iterate through each game
> From: https://www.vlr.gg/event/matches/1438/valorant-vietnam-rising-stars/?series_id=all&group=completed  
> To: https://www.vlr.gg/169825/mxb-xt-vietnam-vs-victory-valorant-vietnam-rising-stars-qf/?game=115368&tab=overview

In [32]:
link_list = []
for anchor in soup.find_all(name = "a", class_ = "wf-module-item"):
    link_list.append("vlr.gg"+anchor.attrs["href"])

### Iterating through all tournaments that match a specific criteria

In [95]:
tournament_list = []

for events_list in soup.find_all(name = "div", class_ = "events-container-col"):
    if events_list.find(name = "div", class_ = "wf-label mod-large mod-completed"):
        for tournaments in events_list.find_all(name = "a", class_ = "wf-card mod-flex event-item"):
            for words in tournaments.find(name = "div", class_ = "event-item-desc-item mod-dates").get_text(strip = True).split("—"):
                if (words[:3] == "Jan"):
                    tournament_list.append("https://www.vlr.gg"+tournaments.attrs["href"])

tournament_list

['https://www.vlr.gg/event/1438/valorant-vietnam-rising-stars',
 'https://www.vlr.gg/event/1461/game-changers-2023-latam-qualifiers',
 'https://www.vlr.gg/event/1440/wandercon',
 'https://www.vlr.gg/event/1469/valorant-east-united-season-2-stage-1-weekly-cup-4',
 'https://www.vlr.gg/event/1462/knights-monthly-gauntlet-2023-january',
 'https://www.vlr.gg/event/1468/asmoogl-community-cup',
 'https://www.vlr.gg/event/1470/golden-goose-6',
 'https://www.vlr.gg/event/1466/valorant-east-united-season-2-stage-1-monthly-cup-1',
 'https://www.vlr.gg/event/1403/project-v-xmg-offseason-clash',
 'https://www.vlr.gg/event/1309/challengers-league-north-america-qualifiers',
 'https://www.vlr.gg/event/1352/nativz-collegiate-valorant-winter-series-2022',
 'https://www.vlr.gg/event/1407/project-v-xmg-trinity-cup',
 'https://www.vlr.gg/event/1459/valorant-east-united-season-2-stage-1-weekly-cup-3',
 'https://www.vlr.gg/event/1335/nerd-street-valorant-lockdown-finals',
 'https://www.vlr.gg/event/1456/gami

In [81]:
soup.find_all(name = "div", class_ = "events-container-col")[1].find(name = "div", class_ = "wf-label mod-large mod-completed")

<div class="wf-label mod-large mod-completed">
				completed events
			</div>

In [71]:
re.split("—", soup.find_all(name = "div", class_ = "events-container-col")[1].find_all(name = "a", class_ = "wf-card mod-flex event-item")[0].find(name = "div", class_ = "event-item-desc-item mod-dates").get_text(strip = True))

['Jan 2', 'Mar 2Dates']

## Master Code for Match Info Collection

In [12]:
comp_list = []
outcome_list = []
map_list = []

for match in adj_matches:

    for i in range(2):
        map_list.append(match.find(name = "div", class_ = "map").find("span").get_text(strip = True))

    for team in match.find_all(name = "div", class_ = "score"):
        if "mod-win" in team.attrs["class"]:
            outcome_list.append("win")
        else:
            outcome_list.append("lose")

    for team in match.find_all("tbody"):
        team_comp = []

        for player in team.find_all("tr"):
            team_comp.append(player.find("img").attrs["title"])
        if team_comp:
            comp_list.append(team_comp)

In [13]:
pro_df = pd.DataFrame({"comp_list": comp_list, "outcome_list": outcome_list, "map_list": map_list})
pro_df

Unnamed: 0,comp_list,outcome_list,map_list
0,"[Raze, Viper, Jett, Sage, Sova]",lose,Icebox
1,"[Viper, Sova, Jett, Reyna, Killjoy]",win,Icebox
2,"[Jett, Killjoy, Sova, Astra, Breach]",lose,Haven
3,"[Astra, Sova, Cypher, Jett, Raze]",win,Haven


<hr>

# Prototype

In [39]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pickle as pkl


comp_list = []
outcome_list = []
map_list = []


# Getting the soup for the URL with the list of tournaments
url = "https://www.vlr.gg/events"
response = requests.get(url).text
soup_main = BeautifulSoup(response, features="lxml")

# Getting the list of tournament links
print("\nCollecting tournaments")
tournament_list = []
for events_list in soup_main.find_all(name = "div", class_ = "events-container-col"):
    if events_list.find(name = "div", class_ = "wf-label mod-large mod-completed"):
        for tournaments in events_list.find_all(name = "a", class_ = "wf-card mod-flex event-item"):
            for words in tournaments.find(name = "div", class_ = "event-item-desc-item mod-dates").get_text(strip = True).split("—"):
                if (words[:3] == "Jan"):
                    tournament_list.append("https://www.vlr.gg"+tournaments.attrs["href"])

print("\n Collecting games in each tournament")
# Collect all the links to the games in each tournament
for tournament_link in tournament_list:

    url_tournament = tournament_link
    response_tournament = requests.get(url_tournament).text
    soup_tournament = BeautifulSoup(response_tournament, features="lxml")

    link_list = []
    for anchor in soup_tournament.find_all(name = "a", class_ = "wf-module-item"):
        link_list.append("https://www.vlr.gg"+anchor.attrs["href"])

    print("Collecting match data from each game")
    # For each game; collect the game data
    for link in link_list:
        url_game = link
        response_game = requests.get(url_game).text
        soup_game = BeautifulSoup(response_game, features="lxml")

        matches = soup_game.find_all(name = "div", class_ ="vm-stats-game")

        adj_matches = []
        # Filter out the tab that aggregates the result for all games given the game (team A vs team B summary of 3 maps played for qualifiers)
        for match in matches:
            if ((match.attrs)["data-game-id"] != "all"):
                # Filter out the tab that is empty if only 2 maps were played instead of 3
                if (match.find(name = "div", class_ = "score mod-win").string.strip() != "0"):
                    adj_matches.append(match)

        # For each match played for the game; collect info
        for match in adj_matches:
            # Collect map played
            for i in range(2):
                map_list.append(match.find(name = "div", class_ = "map").find("span").get_text(strip = True))
            # Collect match outcome
            for team in match.find_all(name = "div", class_ = "score"):
                if "mod-win" in team.attrs["class"]:
                    outcome_list.append("win")
                else:
                    outcome_list.append("lose")
            # Collect team comp
            for team in match.find_all("tbody"):
                team_comp = []

                for player in team.find_all("tr"):
                    if player.find("img"): # To cover for instances where a match was played(?) [there's a score] but no agent comp was recorded
                        team_comp.append(player.find("img").attrs["title"]) 
                    else:
                        comp_list.append(np.nan)
                        break
                if team_comp:
                    comp_list.append(team_comp)

# Convert into a df
pro_df = pd.DataFrame({"comp_list": comp_list, "outcome_list": outcome_list, "map_list": map_list})

# Remove rows where there is no team comp captured
pro_df.dropna(subset = ["comp_list"], inplace = True)

if pro_df:
    with open("C:\\Users\\Binaryxx Sune\\Documents\\Programming\\personal_projects\\data\\pro_data.pkl", "wb") as pro_data_file:
        pkl.dump(pro_df, pro_data_file)

print("Script has finished running")

In [40]:
print("Tournaments:", len(tournament_link))
print("~ Games Each Tournament:", len(link_list))
print("Matches:", len(comp_list))

Tournaments: 80
~ Games Each Tournament: 54
Matches: 532


In [41]:
pro_df

Unnamed: 0,comp_list,outcome_list,map_list
0,"[Raze, Astra, Kayo, Fade, Cypher]",lose,Pearl
1,"[Fade, Astra, Raze, Breach, Killjoy]",win,Pearl
2,"[Jett, Breach, Sova, Killjoy, Astra]",lose,Haven
3,"[Raze, Fade, Astra, Killjoy, Breach]",win,Haven
4,"[Jett, Astra, Killjoy, Kayo, Sova]",win,AscentPICK
...,...,...,...
527,"[Brimstone, Breach, Raze, Fade, Cypher]",lose,Bind
528,"[Raze, Omen, Fade, Viper, Killjoy]",win,Haven
529,"[Breach, Jett, Sova, Omen, Killjoy]",lose,Haven
530,"[Raze, Cypher, Brimstone, Breach, Fade]",win,Fracture
