# Web Scraping Task: Collect Data from Multiple Websites

**Goal:**
Make requests to **at least 3 websites** to obtain the desired information (your choice), for example:

- Weather data
- Exchange rates
- Product prices
- Hotel or restaurant ratings

**Output:**
You should produce **3 CSV files** containing similar data. Examples:

- Weather data for October from 3 different weather forecast sites.
- Exchange rates for one month from 3 different banks.
- Ratings for a list of hotels from 3 different websites.

**Note:**
Make sure the websites **allow web scraping** before accessing their data.


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

headers = {"User-Agent": "Mozilla/5.0"}

def scrape_weather(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    days = [d.get_text(strip=True) for d in soup.select(".tombstone-container .period-name")]
    temps = [t.get_text(strip=True) for t in soup.select(".tombstone-container .temp")]

    n = min(len(days), len(temps))
    return pd.DataFrame({"Day": days[:n], "Temperature": temps[:n]})


# 1️ New York
url_ny = "https://forecast.weather.gov/MapClick.php?lat=40.71&lon=-74.01"
data_ny = scrape_weather(url_ny)
data_ny.to_csv("meteo_newyork.csv", index=False)
print("meteo_newyork.csv salvat! →", len(data_ny), "linii")


# 2️ Los Angeles
url_la = "https://forecast.weather.gov/MapClick.php?lat=34.05&lon=-118.25"
data_la = scrape_weather(url_la)
data_la.to_csv("meteo_losangeles.csv", index=False)
print("meteo_losangeles.csv salvat! →", len(data_la), "linii")


#  Chicago
url_chi = "https://forecast.weather.gov/MapClick.php?lat=41.88&lon=-87.63"
data_chi = scrape_weather(url_chi)
data_chi.to_csv("meteo_chicago.csv", index=False)
print("meteo_chicago.csv salvat! →", len(data_chi), "linii")


meteo_newyork.csv salvat! → 9 linii
meteo_losangeles.csv salvat! → 9 linii
meteo_chicago.csv salvat! → 9 linii


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

headers = {"User-Agent": "Mozilla/5.0"}

# Books to Scrape - Top 30 books 
base_url_books = 'http://books.toscrape.com/catalogue/page-{}.html'
all_books = []

for i in range(1, 51): 
    url = base_url_books.format(i)
    response = requests.get(url, headers=headers) 
    soup = BeautifulSoup(response.text, 'html.parser')

    books = soup.select('.product_pod')
    for book in books:
        title = book.select('h3 a')[0]['title']
        price = book.select('p.price_color')[0].text
        all_books.append([title, price])

    if len(all_books) >= 30:  
        break

df_top30 = pd.DataFrame(all_books[:30], columns=['Title', 'Price'])
df_top30.to_csv('top30_books.csv', index=False)
print("Site 1 CSV saved:", len(df_top30), "books")

# Quotes to Scrape - all the quotes
base_url_quotes = 'http://quotes.toscrape.com/page/{}/'
all_quotes = []

for i in range(1, 11):  
    url = base_url_quotes.format(i)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    quotes = soup.find_all('div', class_='quote')
    for quote in quotes:
        text = quote.find('span', class_='text').text
        author = quote.find('small', class_='author').text
        all_quotes.append([text, author])

df_quotes = pd.DataFrame(all_quotes, columns=['Quote', 'Author'])
df_quotes.to_csv('quotes_all.csv', index=False)
print("Site 2 CSV saved:", len(df_quotes), "quotes")

# Books to Scrape - 'Science' category
url_science = "http://books.toscrape.com/catalogue/category/books/science_22/index.html"
response_science = requests.get(url_science, headers=headers)
soup_science = BeautifulSoup(response_science.text, 'html.parser')

science_books = []
books_science = soup_science.select('.product_pod')
for book in books_science:
    title = book.select('h3 a')[0]['title']
    price = book.select('p.price_color')[0].text
    science_books.append([title, price])

df_science = pd.DataFrame(science_books, columns=['Title', 'Price'])
df_science.to_csv('science_books.csv', index=False)
print("Site 3 CSV saved:", len(df_science), "books")



Site 1 CSV saved: 30 books
Site 2 CSV saved: 100 quotes
Site 3 CSV saved: 14 books
