# Steam Market Web Scraping

In [1]:
#import libraries
from bs4 import BeautifulSoup
import requests
import time
from datetime import datetime
import csv
import pandas as pd

In [2]:
# After examining the source page, choose which data we want to extract.
header = ['Item', 'Price', 'Datetime']

# Create a new csv file and write in the header row.
with open('StockholmStickerData.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)

In [3]:
# Create a function that will retrieve the data we want for a single point in time.
def check_price():
    URL = "https://steamcommunity.com/market/search?q=mouz+stockholm"

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36"}
    
    now = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    
    page = requests.get(URL, headers = headers)

    soup1 = BeautifulSoup(page.content, "html.parser")

    soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

    item = soup2.find_all("span",{"class":"market_listing_item_name"})

    price = soup2.find_all("span",{"class":"normal_price","data-currency":"1"})

    for x, i in enumerate(item):
        item[x] = i.get_text().strip()

    for x, i in enumerate(price):
        price[x] = i.get_text().strip()
    
    data = []

    for n in range(len(item)):
        entry = [item[n], price[n], now]
        data.append(entry)
        
    with open('StockholmStickerData.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        for row in data:
            writer.writerow(row)

# Retrieve data
check_price()

# Inspect our csv file
df = pd.read_csv(r'StockholmStickerData.csv')
df

Unnamed: 0,Item,Price,Datetime
0,Patch | MOUZ | Stockholm 2021,$1.78 USD,14/05/2022 11:00:23
1,Sticker | MOUZ | Stockholm 2021,$0.74 USD,14/05/2022 11:00:23
2,Sticker | MOUZ (Foil) | Stockholm 2021,$26.58 USD,14/05/2022 11:00:23
3,Patch | MOUZ (Gold) | Stockholm 2021,$14.16 USD,14/05/2022 11:00:23
4,Sticker | MOUZ (Gold) | Stockholm 2021,$208.18 USD,14/05/2022 11:00:23
5,Sticker | MOUZ (Holo) | Stockholm 2021,$10.20 USD,14/05/2022 11:00:23


In [None]:
# Set up for recurring data retrieval.
while(True):
    check_price()
    time.sleep(600)

In [11]:
# Look at our data again after retrieving data every 10 min for 30 min.
df = pd.read_csv(r'StockholmStickerData.csv')
df.sort_values(by=['Item', 'Datetime'])
df

Unnamed: 0,Item,Price,Datetime
0,Patch | MOUZ | Stockholm 2021,$1.78 USD,14/05/2022 11:00:23
1,Sticker | MOUZ | Stockholm 2021,$0.74 USD,14/05/2022 11:00:23
2,Sticker | MOUZ (Foil) | Stockholm 2021,$26.58 USD,14/05/2022 11:00:23
3,Patch | MOUZ (Gold) | Stockholm 2021,$14.16 USD,14/05/2022 11:00:23
4,Sticker | MOUZ (Gold) | Stockholm 2021,$208.18 USD,14/05/2022 11:00:23
5,Sticker | MOUZ (Holo) | Stockholm 2021,$10.20 USD,14/05/2022 11:00:23
6,Patch | MOUZ | Stockholm 2021,$1.78 USD,14/05/2022 11:14:17
7,Sticker | MOUZ | Stockholm 2021,$0.74 USD,14/05/2022 11:14:17
8,Sticker | MOUZ (Foil) | Stockholm 2021,$26.58 USD,14/05/2022 11:14:17
9,Sticker | MOUZ (Gold) | Stockholm 2021,$203.55 USD,14/05/2022 11:14:17


In [None]:
# As expected, there is not much change in the data over such a short period of time, but the automation appears to be working as intended.