#Web Scraping Project
# Date: 15 Sept 2025
# Description:Web scraping using Requests and BeautifulSoup

# Import Libraries

In [None]:
# Import Libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.colab import files


# Step 1: Loading the Webpage

In [None]:
# Step 1: Load the Webpage
# Setting the url of the webpage to a variable
url = 'https://www.scrapethissite.com/pages/forms/'
page = requests.get(url)

# Using Beautifulsoup extract HTML content
soup = BeautifulSoup(page.text, 'html.parser')
#print(soup)

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<title>Hockey Teams: Forms, Searching and Pagination | Scrape This Site | A public sandbox for learning web scraping</title>
<link href="/static/images/scraper-icon.png" rel="icon" type="image/png"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta content="Browse through a database of NHL team stats since 1990. Practice building a scraper that handles common website interface components." name="description"/>
<link crossorigin="anonymous" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css" integrity="sha256-MfvZlkHCEqatNoGiOXveE8FIwMzZg4W85qfrfIFBfYc= sha512-dTfge/zgoMYpP7QbHy4gWMEGsbsdZeCXz7irItjcC3sPUFtf0kuFbDz/ixG7ArTxmDjLXDmezHubeNikyKGVyQ==" rel="stylesheet"/>
<link href="https://fonts.googleapis.com/css?family=Lato:400,700" rel="stylesheet" type="text/css"/>
<link href="/static/css/styles.css" rel="stylesheet" type="text/css"/>
<meta content="noindex" name="robo

# Step 2: Find and print  the Hockey store table

In [None]:

hokey_table = soup.find('table', class_='table')
#print(hokey_table)

# Step 3: Get the Column Headers

In [None]:

# In Html table Headers are within the <th> and </th> tags
# extract all elements within <th>, removes leading and trailing spaces and Using a for loop list the headers of the table.
table_headers=hokey_table.find_all('th')
headers = [th.text.strip() for th in table_headers]
print(headers)



['Team Name', 'Year', 'Wins', 'Losses', 'OT Losses', 'Win %', 'Goals For (GF)', 'Goals Against (GA)', '+ / -']


# Step 4: Extract data row by row

In [None]:
# Step 4:Extract data row by row
data_rows=[]
for row in hokey_table.find_all('tr')[1:]:
  row_data = [td.text.strip() for td in row.find_all('td')]
  data_rows.append(row_data)
print(data_rows[:10])


[['Boston Bruins', '1990', '44', '24', '', '0.55', '299', '264', '35'], ['Buffalo Sabres', '1990', '31', '30', '', '0.388', '292', '278', '14'], ['Calgary Flames', '1990', '46', '26', '', '0.575', '344', '263', '81'], ['Chicago Blackhawks', '1990', '49', '23', '', '0.613', '284', '211', '73'], ['Detroit Red Wings', '1990', '34', '38', '', '0.425', '273', '298', '-25'], ['Edmonton Oilers', '1990', '37', '37', '', '0.463', '272', '272', '0'], ['Hartford Whalers', '1990', '31', '38', '', '0.388', '238', '276', '-38'], ['Los Angeles Kings', '1990', '46', '24', '', '0.575', '340', '254', '86'], ['Minnesota North Stars', '1990', '27', '39', '', '0.338', '256', '266', '-10'], ['Montreal Canadiens', '1990', '39', '30', '', '0.487', '273', '249', '24']]



# Step 5: Create an empty DataFrame

In [None]:
# Create a dataframe and save the column headers and row data
HTeam_df=pd.DataFrame(columns=headers)
#print(HTeam_df)

# Step 6: Fill dataframe with data


In [None]:
# Add table rows to the Dataframe
HTeam_df=pd.DataFrame(data=data_rows, columns=headers)
#print(HTeam_df

# Inspecting fist 5 rows
display(HTeam_df.head())




Unnamed: 0,Team Name,Year,Wins,Losses,OT Losses,Win %,Goals For (GF),Goals Against (GA),+ / -
0,Boston Bruins,1990,44,24,,0.55,299,264,35
1,Buffalo Sabres,1990,31,30,,0.388,292,278,14
2,Calgary Flames,1990,46,26,,0.575,344,263,81
3,Chicago Blackhawks,1990,49,23,,0.613,284,211,73
4,Detroit Red Wings,1990,34,38,,0.425,273,298,-25


# Step 7: Saving and downloading File


In [None]:
# save df to csv
HTeam_df.to_csv('hockey_teams.csv', index=False)

files.download('hockey_teams.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>