## Capture the Flag Challenge

**Website:** https://hertie-scraping-website.vercel.app  
**Task:** Scrape 40 flags (text snippets) from the website using BeauitfulSoup

In [1]:
from bs4 import BeautifulSoup
import requests
import re

In [2]:
#Defining URL: 
url = "https://hertie-scraping-website.vercel.app"

#Parsing content of page
req = requests.get(url)
soup = BeautifulSoup(req.content, "html.parser")

#Initialising list of flags 
flags = []

#### Scraping Flag 1-7

In [3]:
#Finding all p tags that contain flags 
p_tags_textbase = soup.find_all('p', class_='text-base')

#using regex to extract flags 1-7
for flag in p_tags_textbase:

    #regex extracts statements that extactly match this pattern: starting with 'flag-' followed by any digit
    if re.fullmatch(r'flag-\d+', flag.text):
        flags.append(flag.text)

flags   


['flag-1', 'flag-2', 'flag-3', 'flag-4', 'flag-5', 'flag-6', 'flag-7']

#### Scraping Flag 8

In [4]:
p_tags_texttransparent = soup.find('p', class_='text-transparent')

#using regex in the p tag, class = text-transparent to match an expression with the pattern: starting with 'flag-' followed by any digit 
if p_tags_texttransparent: 
        match = re.fullmatch(r'flag-\d+', p_tags_texttransparent.text)
        if match:
                # match.group() returns full text of the match
               flags.append(match.group())

flags   

['flag-1',
 'flag-2',
 'flag-3',
 'flag-4',
 'flag-5',
 'flag-6',
 'flag-7',
 'flag-8']

#### Scraping Flag 9

In [5]:
div_tag_sticky = soup.find('div', class_='h-20 sticky')

#Navigating to alt attribute within first img tag
if div_tag_sticky:
    
        #navigating to img tag
        img_tag = div_tag_sticky.find('img') 

        if img_tag:

                #Applyin regex to alt attribute within image tag
                match = re.fullmatch(r'flag-\d+', img_tag.get('alt', ''))
        
                if match:
                # match.group() returns full text of the match
                        flags.append(match.group())
    


flags   

['flag-1',
 'flag-2',
 'flag-3',
 'flag-4',
 'flag-5',
 'flag-6',
 'flag-7',
 'flag-8',
 'flag-9']

#### Scraping Flag 10-40

In [6]:
# Finding all div tags with element id starting with "flag-"
div_tag_flag = soup.find_all('div', class_='size-10')


# Looping through each div element
for flag in div_tag_flag:
    
    # navigating to class and id of child div 
    inner_div = flag.find('div')

    if inner_div:

        # navigating to ID attribute 
        id_value = inner_div.get('id')

        #Applying regex to extract flags
        if id_value and re.fullmatch(r'flag-\d+', id_value):
            flags.append(id_value)
        
        # navigating to class attribute
        class_values = inner_div.get('class', [])

        #Applying regex to extract flags
        for class_value in class_values:
            if re.fullmatch(r'flag-\d+', class_value):
                flags.append(class_value)
flags


['flag-1',
 'flag-2',
 'flag-3',
 'flag-4',
 'flag-5',
 'flag-6',
 'flag-7',
 'flag-8',
 'flag-9',
 'flag-10',
 'flag-11',
 'flag-12',
 'flag-13',
 'flag-14',
 'flag-15',
 'flag-16',
 'flag-17',
 'flag-18',
 'flag-19',
 'flag-20',
 'flag-21',
 'flag-22',
 'flag-23',
 'flag-24',
 'flag-25',
 'flag-26',
 'flag-27',
 'flag-28',
 'flag-29',
 'flag-30',
 'flag-31',
 'flag-32',
 'flag-33',
 'flag-34',
 'flag-35',
 'flag-36',
 'flag-37',
 'flag-38',
 'flag-39',
 'flag-40']