# WEB SCRAPING WITHOUT AUTOMATIZATION

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://www.paginebianche.it/"
page = 1
persons = []
print("Please insert the surname that you want to search")
surname = input("").lower().strip()
while True:
    raw_html_url = f"https://www.paginebianche.it/persone?qs={surname}&p={page}"
    
    # Fetch the HTML content
    raw_html = requests.get(raw_html_url)
    html_content = raw_html.text
    
    # Parse the HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Extract the listings on the current page
    listings = soup.find_all('div', class_="list-element__content")
    

    
    # Iterate over each listing
    for listing in listings:
        # Extract the name, address, and telephone
        name_element = listing.find('h2', class_="list-element__title ln-3 org fn")
        name = name_element.text.strip() if name_element else "N/A"
    
        address_element = listing.find('div', class_="list-element__address adr")
        address = address_element.text.strip() if address_element else "N/A"
    
        telephone_element = listing.find('div', class_="btn__label tel")
        telephone = telephone_element.text.strip() if telephone_element else "N/A"
    
        # Create the person list
        person = [name, address, telephone]
    
        # Append the person list to the persons list
        persons.append(person)
    
    # Increment the page number
    page += 1
        # If no listings found, exit the loop
    if not listings:
        break
# Print the persons list
for person in persons:
    print(person)


Please insert the surname that you want to search
petrovic
['Petrovic Ambra', "Via Martiri Della Liberta' 50 - 34079 Staranzano (GO)", '0481 778129']
['Petrovic Danijela', 'Via Padovan Giglio 5 - 34138 Trieste (TS)', '040 2034217']
['Petrovic Dejan', 'Via Roma 9 - 38054 Tonadico (TN)', '388 6562947']
['Petrovic Dino', 'Via Protomartiri Francescani 77 - 06081 Assisi (PG)', '075 8040739']
['Petrovic Dragana', 'Via Campo Marte 9 - 25128 Brescia (BS)', '030 3700037']
['Petrovic Dragica', 'Via Mantegazza Paolo 37 - 19126 La spezia (SP)', '0187 523591']
['Petrovic Drasko', 'Via Aldo Moro 11 - 29027 Podenzano (PC)', '0523 554203']
['Petrovic Dubravka', 'Via Della Palazzina 32 - 25124 Brescia (BS)', '030 3534161']
['Petrovic Dusko', 'Via S. Vito 18 - 36034 Malo (VI)', '0445 605498']
['Petrovic Erzebet', 'Via Non Dichiarato 00 - 37063 Isola della scala (VR)', '327 3146778']
['Petrovic Gordana', 'Via Isonzo 58/C - 22100 Como (CO)', '031 526172']
['Petrovic Ljubomir', 'Via Udine 46 - 33054 Lignan

# WEBSCRAPING WITH AUTOMATIZATION

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
i = 0
browser = webdriver.Chrome()
print("Please insert the surname that you want to search")
surname = input("").lower().strip()
browser.get(f'https://tel.search.ch/?was={surname}&privat=1&pages=1')

items = []
last_height = browser.execute_script("return document.body.scrollHeight")
while True:
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)
    new_height = browser.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    elements = browser.find_elements(By.CSS_SELECTOR, "body > div")
    textElements = []
    
# Get the final URL
final_url = browser.current_url

# Close the browser
browser.quit()



base_url = "https://www.search.ch/"
# Fetch the HTML content
raw_html = requests.get(final_url)
html_content = raw_html.text
# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')
# Create an empty list to store persons
persons = []
# Extract the listings on the current page
listings = soup.find_all('table', class_='tel-resultentry')
# Iterate over each listing
for listing in listings:
    # Extract the name, address, and telephone
    name_element = listing.find('h1')
    name = name_element.text.strip()
    address_element = listing.find('div', class_='tel-address')
    address = address_element.text.strip()
    telephone_element = listing.find('a', class_='tel-result-action')
    if telephone_element:
        telephone_href = telephone_element.get('href')
        telephone_match = re.search(r'\+(\d+)', telephone_href)
        if telephone_match:
            telephone = telephone_match.group(1)
        else:
            telephone = 'N/A'
    else:
        telephone = 'N/A'
    # Create the person list
    person = [name, address, telephone]
    # Append the person list to the persons list
    persons.append(person)
# Print the persons list
for person in persons:
    i+=1
    print(i,person)

Please insert the surname that you want to search
cvetkovic
1 ['Bariska Cvetkovic, Denise', 'Haldenweg 4, 5235 Rüfenach AG', '41447261203']
2 ['Cvetkovic, Bane und Dusica', 'Rüttistrasse 8, 6467 Schattdorf UR', '41418710239']
3 ['Cvetkovic, Bratislav (-Jankovic)', 'Florahof 2, 8353 Elgg ZH', '41523643416']
4 ['Cvetkovic, Bratislav', 'Zürcherstrasse 30, 8102 Oberengstringen ZH', '41447511792']
5 ['Cvetkovic, Cedomir', 'Via Ernesto Bosia 15, 6900 Paradiso TI', '41919800903']
6 ['Cvetkovic, Danijela', 'Austrasse, 9230 Flawil SG', '41767099791']
7 ['Cvetkovic, Dragan', 'Obere Fischbachstrasse 5, 8932 Mettmenstetten ZH', '41447682338']
8 ['Cvetkovic, Dragan', 'Rainstrasse 13, 9500 Wil SG', '41719254358']
9 ['Cvetkovic, Goran', 'Austrasse 3, 9230 Flawil SG', '41713931231']
10 ['Cvetkovic, Ivana und Dejan', 'Ruopigenplatz 24, 6015 Luzern', '41412407175']
11 ['Cvetkovic, Jadranca', 'Rue des Minoteries 5, 1205 Genève', '41223415220']
12 ['Cvetkovic, Ljiljana (-Stankovic)', 'Weiherstrasse 20, 83

# FOOTBALL CLUB INFO

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

headers = {
    'authority': 'api.sofascore.com',
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'max-age=0',
    'if-none-match': 'W/"e90558a479"',
    'origin': 'https://www.sofascore.com',
    'referer': 'https://www.sofascore.com/',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Linux"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
}

# Make a GET request and get the data by using headers
response_api = requests.get('https://api.sofascore.com/api/v1/event/11605972/shotmap', headers=headers)

if response_api.status_code == 200:
    shots_data = response_api.json()['shotmap']
    df = pd.json_normalize(shots_data)

    # Displaying the DataFrame as a table
    print(df)

    # Save the DataFrame to an Excel file
    df.to_excel('shots_data.xlsx', index=False)

else:
    print(f"Error in the API request: {response_api.status_code}")

    isHome shotType   situation    bodyPart goalMouthLocation        xg  \
0    False     miss      corner        head              left  0.054920   
1    False     save     regular  right-foot          low-left  0.199597   
2    False     miss    assisted  right-foot  close-high-right  0.029088   
3    False    block     regular  right-foot        low-centre  0.042944   
4     True     miss   set-piece  right-foot              high  0.035873   
5    False     post    assisted  right-foot              left  0.026028   
6     True     save     regular  right-foot         low-right  0.113952   
7    False     miss    assisted  right-foot              high  0.100776   
8    False     miss    assisted  right-foot        high-right  0.055613   
9    False    block     regular   left-foot          low-left  0.091924   
10   False     miss    assisted  right-foot        close-left  0.060351   
11   False     miss     regular  right-foot       close-right  0.020322   
12    True     save    as