Steam Scraping data Save to CSV file

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import re
import time

# Daftar filter pencarian yang akan digunakan
search_filters = ['topsellers', 'mostplayed', 'newreleases', 'upcomingreleases']

# Jumlah maksimum data yang ingin kita scrap
MAX_DATA = 10000
line_count = 0

# Membuka file CSV untuk menyimpan hasil scraping
with open('steam_games.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Name', 'Published Date', 'Original Price', 'Discount Price', 'Reviews', 'Search Query'])

    for search_filter in search_filters:
        page = 1

        while line_count < MAX_DATA:
            # URL pencarian berdasarkan kategori dan halaman
            url = f'https://store.steampowered.com/search/?filter={search_filter}&page={page}'

            # Mengirim request ke Steam
            response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
            if response.status_code != 200:
                print(f"Gagal mengakses halaman {url}")
                break  # Hentikan jika gagal mengambil data

            # Parse konten HTML
            doc = BeautifulSoup(response.content, 'html.parser')

            # Mencari semua game di halaman tersebut
            games = doc.find_all('div', {'class': 'responsive_search_name_combined'})

            if not games:
                print(f"Tidak ada game ditemukan di halaman {page} untuk filter {search_filter}")
                break  # Hentikan jika tidak ada game di halaman ini

            for game in games:
                if line_count >= MAX_DATA:
                    break  # Berhenti jika sudah mencapai batas data

                try:
                    name = game.find('span', {'class': 'title'}).text.strip()
                except AttributeError:
                    name = "N/A"

                try:
                    published_date = game.find('div', {'class': 'col search_released responsive_secondrow'}).text.strip()
                except AttributeError:
                    published_date = "N/A"

                try:
                    original_price_elem = game.find('div', {'class': 'discount_original_price'})
                    original_price = original_price_elem.text.strip() if original_price_elem else 'N/A'
                except AttributeError:
                    original_price = "N/A"

                try:
                    discount_price_elem = game.find('div', {'class': 'discount_final_price'})
                    discount_price = discount_price_elem.text.strip() if discount_price_elem else 'N/A'
                except AttributeError:
                    discount_price = "N/A"

                try:
                    review_summary = game.find('span', {'class': 'search_review_summary'})
                    reviews_html = review_summary['data-tooltip-html'] if review_summary else 'N/A'
                    match = re.search(r'(\d+,*\d*)\s+user reviews', reviews_html)
                    reviews_number = match.group(1).replace(',', '') if match else 'N/A'
                except AttributeError:
                    reviews_number = "N/A"

                # Menulis hasil scraping ke dalam file CSV
                writer.writerow([name, published_date, original_price, discount_price, reviews_number, search_filter])
                line_count += 1

            print(f"✔ Scraped {line_count} data so far...")
            page += 1  # Pindah ke halaman berikutnya
            time.sleep(2)  # Delay agar tidak terkena rate limit

print(f"✅ Scraping selesai! Total data tersimpan: {line_count}")


✔ Scraped 25 data so far...
✔ Scraped 50 data so far...
✔ Scraped 75 data so far...
✔ Scraped 100 data so far...
✔ Scraped 125 data so far...
✔ Scraped 150 data so far...
✔ Scraped 175 data so far...
✔ Scraped 200 data so far...
✔ Scraped 225 data so far...
✔ Scraped 250 data so far...
✔ Scraped 275 data so far...
✔ Scraped 300 data so far...
✔ Scraped 325 data so far...
✔ Scraped 350 data so far...
✔ Scraped 375 data so far...
✔ Scraped 400 data so far...
✔ Scraped 425 data so far...
✔ Scraped 450 data so far...
✔ Scraped 475 data so far...
✔ Scraped 500 data so far...
✔ Scraped 525 data so far...
✔ Scraped 550 data so far...
✔ Scraped 575 data so far...
✔ Scraped 600 data so far...
✔ Scraped 625 data so far...
✔ Scraped 650 data so far...
✔ Scraped 675 data so far...
✔ Scraped 700 data so far...
✔ Scraped 725 data so far...
✔ Scraped 750 data so far...
✔ Scraped 775 data so far...
✔ Scraped 800 data so far...
✔ Scraped 825 data so far...
✔ Scraped 850 data so far...
✔ Scraped 875 dat