<a href="https://colab.research.google.com/github/BelongsToMe091603/Portfolio-MN/blob/main/Scraping_Website.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scraping HTML

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
# 1. Request HTML
url = "https://www.aljazeera.com/news/"  # halaman News Al Jazeera
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
resp = requests.get(url, headers=headers)
resp.raise_for_status()

In [None]:
# 2. Parsing HTML
soup = BeautifulSoup(resp.text, "html.parser")

In [None]:
# Tag 1: Judul berita (<h3>)
titles = [t.get_text(strip=True) for t in soup.find_all("h3")]
titles

['US ranchers whiplashed by Trump’s beef policies',
 'UK unveils significant tax rises in budget after ‘shambolic’ forecast leak',
 'Guinea-Bissau army officers claim ‘total control’ as gunshots rock capital',
 'Russia-Ukraine ‘peace plan’: What’s the latest version after US-Kyiv talks?',
 'At least 13 dead as fire engulfs Hong Kong high-rise residential buildings',
 'India-China in new spat over Arunachal Pradesh: What’s it all about?',
 'Europe reimagines rearmament at sea, learning from Russia’s war on Ukraine',
 'Palestine Action’s legal challenge against UK government ban begins',
 'Russia denies incursions into NATO airspace, but what’s really happening?',
 'Russia denies incursions into NATO airspace, but what’s really happening?',
 'Israel lays siege to occupied West Bank’s Tubas, displaces tens of families',
 'Does adolescence last until 32? Scientists unlock brain’s five eras',
 'Arsenal host Bayern Munich in a top of the table Champions League showdown',
 'Photos: Floods kil

In [None]:
# Tag 2: Deskripsi singkat (<p>)
descriptions = [d.get_text(strip=True) for d in soup.find_all("p")]
descriptions

['Ranchers find themselves caught between the president’s desires to appease both them and the US consumer.',
 'AJ Defense Editor @alexgatopoulos breaks down grey zone warfare and why NATO is accusing Russia of acts of sabotage',
 'Governor says new assault aims to impose ‘new realities’ as Israel shuts parts of Tubas from rest of occupied West Bank.',
 'Researchers reveal that four ages – nine, 32, 66 and 83 – are when the brain’s most pivotal development occurs.',
 'Undefeated Arsenal clash with Champions League ladder leaders Bayern Munich in a star-studded contest in North London.',
 'Flooding in southern Thailand kills 33 people and displaces more than 10,000 as heavy rains batter seven provinces.',
 'Last April, police used a heavy hand to stop the Palestine Congress, pulling the plug soon after the forum started.',
 'Israel is still working to manage public opinion around its genocidal war in Gaza. But how much is it costing to run?',
 'Aid organisations say that at least 300,00

In [None]:
# Tag 3: URL berita (<a>)
links = []
for a in soup.find_all("a", href=True):
    href = a["href"]
    text = a.get_text(strip=True)
    links.append((text, href))
links

[('Skip to Featured Content', '#featured-news-container'),
 ('Skip to Content Feed', '#news-feed-container'),
 ('', '/'),
 ('playLive', '/live'),
 ('News', '/news/'),
 ('Africa', '/africa/'),
 ('Asia', '/asia/'),
 ('US & Canada', '/us-canada/'),
 ('Latin America', '/latin-america/'),
 ('Europe', '/europe/'),
 ('Asia Pacific', '/asia-pacific/'),
 ('Middle East', '/middle-east/'),
 ('Explained', '/tag/explainer/'),
 ('Opinion', '/opinion/'),
 ('Sport', '/sports/'),
 ('Video', '/videos/'),
 ('Features', '/features/'),
 ('Economy', '/economy/'),
 ('Human Rights', '/tag/human-rights/'),
 ('Climate Crisis', 'https://www.aljazeera.com/climate-crisis'),
 ('Investigations', '/investigations/'),
 ('Interactives', '/interactives/'),
 ('In Pictures', '/gallery/'),
 ('Science & Technology', '/tag/science-and-technology/'),
 ('Podcasts', '/audio/podcasts'),
 ('Travel', 'https://www.aljazeera.com/travel'),
 ('playLive', '/live'),
 ('US ranchers whiplashed by Trump’s beef policies',
  '/economy/2025/1

In [None]:
# 1. Request HTML halaman utama
url = "https://www.aljazeera.com/"
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")

# 2. Cari semua container berita
articles = soup.find_all("article")

# 3. Ekstraksi 3 tag:
data = []

for art in articles:
    # Judul berita (<h3>)
    title_tag = art.find("h3")
    title = title_tag.get_text(strip=True) if title_tag else None

    # Deskripsi berita (<p>)
    desc_tag = art.find("p")
    desc = desc_tag.get_text(strip=True) if desc_tag else None

    # Link berita (<a>)
    link_tag = art.find("a", href=True)
    link = link_tag["href"] if link_tag else None

    # Masukkan hanya berita yang punya judul & link
    if title and link:
        data.append({
            "judul": title,
            "deskripsi": desc,
            "link": link
        })

# 4. Convert ke DataFrame
df = pd.DataFrame(data)

df

Unnamed: 0,judul,deskripsi,link
0,Russia quiet on Ukraine peace plan as ground f...,,/news/liveblog/2025/11/26/live-ukraine-and-rus...
1,Guinea-Bissau army officers claim ‘total contr...,Officers order suspension of electoral process...,/news/2025/11/26/guinea-bissau-army-claims-tot...
2,Guinea-Bissau army officers claim ‘total contr...,Officers order suspension of electoral process...,/news/2025/11/26/guinea-bissau-army-claims-tot...
3,"Palestinians in Gaza: ‘This is a nightmare, no...",,/features/2025/11/26/stuck-in-gazas-limbo-pale...
4,"Palestinians in Gaza: ‘This is a nightmare, no...",,/features/2025/11/26/stuck-in-gazas-limbo-pale...
5,Georgia judge drops election interference case...,,/news/2025/11/26/georgia-judge-drops-election-...
6,At least 13 dead as fire engulfs Hong Kong hig...,,/news/2025/11/26/at-least-13-dead-as-fire-engu...
7,India-China in new spat over Arunachal Pradesh...,,/news/2025/11/26/india-china-in-new-spat-over-...
8,UK unveils significant tax rises in budget aft...,,/news/2025/11/26/uk-unveils-significant-tax-ri...
9,Israel lays siege to occupied West Bank’s Tuba...,,/news/2025/11/26/israels-siege-on-west-bank-ci...


In [None]:
df.shape

(33, 3)

In [None]:
df.head()

Unnamed: 0,judul,deskripsi,link
0,Russia quiet on Ukraine peace plan as ground f...,,/news/liveblog/2025/11/26/live-ukraine-and-rus...
1,Guinea-Bissau army officers claim ‘total contr...,Officers order suspension of electoral process...,/news/2025/11/26/guinea-bissau-army-claims-tot...
2,Guinea-Bissau army officers claim ‘total contr...,Officers order suspension of electoral process...,/news/2025/11/26/guinea-bissau-army-claims-tot...
3,"Palestinians in Gaza: ‘This is a nightmare, no...",,/features/2025/11/26/stuck-in-gazas-limbo-pale...
4,"Palestinians in Gaza: ‘This is a nightmare, no...",,/features/2025/11/26/stuck-in-gazas-limbo-pale...


In [None]:
df.tail()

Unnamed: 0,judul,deskripsi,link
28,"Ethiopian volcano erupts after 12,000 years: W...",,/news/2025/11/25/ethiopian-volcano-erupts-afte...
29,At least 13 dead as fire engulfs Hong Kong hig...,,/news/2025/11/26/at-least-13-dead-as-fire-engu...
30,"Palestinians in Gaza: ‘This is a nightmare, no...",,/features/2025/11/26/stuck-in-gazas-limbo-pale...
31,Does adolescence last until 32? Scientists unl...,,/news/2025/11/26/does-adolescence-last-until-3...
32,Berlin shutdown of pro-Palestine conference wa...,,/news/2025/11/26/germanys-shutdown-of-pro-pale...


In [None]:
df.dtypes

Unnamed: 0,0
judul,object
deskripsi,object
link,object


In [None]:
df.isnull().sum()

Unnamed: 0,0
judul,0
deskripsi,25
link,0


In [None]:
df.nunique()

Unnamed: 0,0
judul,22
deskripsi,7
link,22


In [None]:
data = []
data.append({
    "judul": titles,
    "deskripsi": descriptions,
    "link": links
})

In [None]:
df = pd.DataFrame(data)

In [None]:
df

Unnamed: 0,judul,deskripsi,link
0,[US ranchers whiplashed by Trump’s beef polici...,[Ranchers find themselves caught between the p...,"[(Skip to Featured Content, #featured-news-con..."


In [None]:
min_len = min(len(titles), len(descriptions), len(links))
min_len

12

In [None]:
df = pd.DataFrame({
    "judul": titles[:min_len],
    "deskripsi": descriptions[:min_len],
    "link_text": [l[0] for l in links][:min_len],
    "link_url": [l[1] for l in links][:min_len]
})

In [None]:
df

Unnamed: 0,judul,deskripsi,link_text,link_url
0,US ranchers whiplashed by Trump’s beef policies,Ranchers find themselves caught between the pr...,Skip to Featured Content,#featured-news-container
1,UK unveils significant tax rises in budget aft...,AJ Defense Editor @alexgatopoulos breaks down ...,Skip to Content Feed,#news-feed-container
2,Guinea-Bissau army officers claim ‘total contr...,Governor says new assault aims to impose ‘new ...,,/
3,Russia-Ukraine ‘peace plan’: What’s the latest...,"Researchers reveal that four ages – nine, 32, ...",playLive,/live
4,At least 13 dead as fire engulfs Hong Kong hig...,Undefeated Arsenal clash with Champions League...,News,/news/
5,India-China in new spat over Arunachal Pradesh...,Flooding in southern Thailand kills 33 people ...,Africa,/africa/
6,"Europe reimagines rearmament at sea, learning ...","Last April, police used a heavy hand to stop t...",Asia,/asia/
7,Palestine Action’s legal challenge against UK ...,Israel is still working to manage public opini...,US & Canada,/us-canada/
8,"Russia denies incursions into NATO airspace, b...","Aid organisations say that at least 300,000 mo...",Latin America,/latin-america/
9,"Russia denies incursions into NATO airspace, b...",The recent strikes on boats in the Caribbean a...,Europe,/europe/


# Scraping Network

In [None]:
import requests
import pandas as pd

base = "https://shop.rumahatsiri.com"

r = requests.get(f"{base}/products.json", headers={"Accept": "application/json", "User-Agent": "Mozilla/5.0"})
r.raise_for_status()
data = r.json()   # biasanya {'products': [ ... ]}

products = data.get("products", [])
df = pd.json_normalize(products)   # flatten nested fields
print(df.shape)
df

(30, 13)


Unnamed: 0,id,title,handle,body_html,published_at,created_at,updated_at,vendor,product_type,tags,variants,images,options
0,10173653025069,Rumah Atsiri Mom & Baby Care Set Paket Minyak ...,rumah-atsiri-mom-baby-care-set-paket-minyak-te...,"<p>Set perawatan praktis untuk ibu dan bayi, i...",2025-11-28T17:07:07+07:00,2025-11-28T17:07:06+07:00,2025-12-01T16:21:54+07:00,Tidak ada merek,Gifts&Sets,[],"[{'id': 51672625086765, 'title': 'Default Titl...","[{'id': 53219198271789, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."
1,10173650927917,Rumah Atsiri Hand Wash Holiday,rumah-atsiri-hand-wash-holiday,<p>Hand Wash Holiday Edition</p><p>Celebrate t...,2025-11-28T16:53:58+07:00,2025-11-28T16:53:57+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Hand Wash,[],"[{'id': 51672601559341, 'title': 'Rayu / Red',...","[{'id': 53219166028077, 'created_at': '2025-11...","[{'name': 'Aroma', 'position': 1, 'values': ['..."
2,10168038097197,Rumah Atsiri Kembang Setaman Reed Diffuser Hol...,rumah-atsiri-kembang-setaman-reed-diffuser-hol...,<p>Rasakan keanggunan dan kemewahan taman bung...,2025-11-21T15:41:21+07:00,2025-11-21T15:41:21+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Gifts&Sets,[],"[{'id': 51657021620525, 'title': 'Default Titl...","[{'id': 53225239445805, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."
3,10168024793389,Rumah Atsiri Bersama Tuku 2015 Hand Wash,rumah-atsiri-bersama-tuku-2015-hand-wash,<p><strong>2015 Hand Wash</strong><br>Sabun ta...,2025-11-21T14:18:08+07:00,2025-11-21T14:18:07+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Hand Wash,[],"[{'id': 51656945041709, 'title': 'Default Titl...","[{'id': 53182455251245, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."
4,10168022171949,Rumah Atsiri Bersama Tuku 2015 Pocket Hand San...,rumah-atsiri-bersama-tuku-2015-pocket-hand-san...,<p>Pembersih tangan dengan aroma kopi dan kara...,2025-11-21T14:07:12+07:00,2025-11-21T14:07:11+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Hand Sanitizer,[],"[{'id': 51656931246381, 'title': 'Default Titl...","[{'id': 53182420975917, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."
5,10168015454509,Rumah Atsiri Bersama Tuku 2015 Eau De Parfum,rumah-atsiri-bersama-tuku-2015-eau-de-parfum,<p>2015 Eau de Parfum merangkum kisah manis sa...,2025-11-21T13:18:56+07:00,2025-11-21T13:18:48+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Eau de Parfum,[],"[{'id': 51656890614061, 'title': 'Default Titl...","[{'id': 53182358815021, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."
6,9867136762157,ATSIRI Sanctuary Gift Set,atsiri-sanctuary-gift-set,"<p dir=""ltr""><span>ATSIRI Sanctuary Gift Set<b...",2025-11-17T14:24:03+07:00,2024-11-16T11:30:20+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Gifts&Sets,[],"[{'id': 50198320906541, 'title': 'Red / 1963',...","[{'id': 53223001260333, 'created_at': '2025-11...","[{'name': 'Color', 'position': 1, 'values': ['..."
7,10164435681581,ATSIRI Mystical Set Paket Dupa Harum Aromatic ...,atsiri-mystical-set-paket-dupa-harum-aromatic-...,<p>Merangkum keindahan warisan budaya dan keda...,2025-11-13T14:54:03+07:00,2025-11-13T14:54:02+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Gifts&Sets,[],"[{'id': 51638820372781, 'title': 'Default Titl...","[{'id': 53225194783021, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."
8,10164417954093,ATSIRI Arum Gift Set 3in1 Body Scrub Massage O...,atsiri-arum-gift-set-3in1-body-scrub-massage-o...,<p>DESKRIPSI<br>Arum Gift Set adalah koleksi m...,2025-11-13T13:49:01+07:00,2025-11-13T13:49:00+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Gifts&Sets,[],"[{'id': 51638734127405, 'title': 'Tuberose', '...","[{'id': 53217357365549, 'created_at': '2025-11...","[{'name': 'Aroma', 'position': 1, 'values': ['..."
9,10159601516845,Rumah Atsiri Super Mom Gift Set Hadiah Ibu Ham...,rumah-atsiri-super-mom-gift-set-hadiah-ibu-ham...,<p>DESKRIPSI<br>Set perawatan lengkap untuk ib...,2025-11-10T13:17:05+07:00,2025-11-10T13:17:05+07:00,2025-12-01T16:21:54+07:00,ATSIRI,Gifts&Sets,[],"[{'id': 51628817678637, 'title': 'Default Titl...","[{'id': 53223002538285, 'created_at': '2025-11...","[{'name': 'Title', 'position': 1, 'values': ['..."


In [None]:
pip install requests



In [None]:
import requests

In [None]:
req = requests.post(
    url = "https://shop.rumahatsiri.com/api/collect",
    json = {
        "email": "naufalkz04@gmail.com",
    }
)
print(req.status_code)

200


In [None]:
print("Status:", req.status_code)
print("Headers:", req.headers)
print("Text:", req.text[:500])

Status: 200
Headers: {'Date': 'Mon, 01 Dec 2025 17:03:03 GMT', 'Content-Type': 'text/html; charset=utf-8', 'Content-Length': '0', 'Connection': 'keep-alive', 'CF-RAY': '9a7427de489a53b5-ATL', 'x-sorting-hat-podid': '300', 'x-sorting-hat-shopid': '79711404333', 'x-storefront-renderer-rendered': '1', 'shopify-complexity-score': '0', 'set-cookie': '_shopify_essential=:AZra3icWAAEA5jMANO8Q17GnCJcNFUf9lWGLBrWpcPZRPAU3XU5-P0m4RKtESD0mVCw1kDc0-hMhyjFwxvRAvjYvepY7N_XBY5t6v2UIfFW6UqRN8iaoLg3LdFO3yWZxjBswFT73rDFsRXOV4raPdQArPL5Q96NQp4gC2KI7B1ampsawYMMkW_AS0-evBkHebsO4CzG4ckkcxwEo5HDiByAZGH7TW4CW1URu_p-IiQ-uqvkNbA:; Max-Age=31536000; Path=/; HttpOnly; Secure; Priority=High; SameSite=Lax', 'x-frame-options': 'DENY', 'content-security-policy': "block-all-mixed-content; frame-ancestors 'none'; upgrade-insecure-requests;", 'x-shopid': '79711404333', 'x-shardid': '300', 'vary': 'Accept', 'Alt-Svc': 'h3=":443"; ma=86400', 'powered-by': 'Shopify', 'server-timing': 'processing;dur=5;desc="gc:1", asn;desc

In [None]:
import requests

req = requests.get(
    url = "https://trading.paskomnas.id/product?page=1&limit=20#!"
)
req.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
zalora_request = requests.get(
    url     = "https://trading.paskomnas.id/product?page=1&limit=20#!",
    headers = {
        "accept": "application/json"
    }
)

zalora_request

<Response [200]>

In [None]:
# ambil content dari api tersebut
zalora_content = zalora_request.content
zalora_content

b'<!DOCTYPE html>\n<html lang="en" dir="ltr">\n    <head>\n        <meta charset="utf-8">\n                <title>Daftar Produk - Belanja Sayur & Buah\n</title>\n        <link rel="apple-touch-icon" sizes="57x57" href="https://trading.paskomnas.id/images/favicon/apple-icon-57x57.png">\n        <link rel="apple-touch-icon" sizes="60x60" href="https://trading.paskomnas.id/images/favicon/apple-icon-60x60.png">\n        <link rel="apple-touch-icon" sizes="72x72" href="https://trading.paskomnas.id/images/favicon/apple-icon-72x72.png">\n        <link rel="apple-touch-icon" sizes="76x76" href="https://trading.paskomnas.id/images/favicon/apple-icon-76x76.png">\n        <link rel="apple-touch-icon" sizes="114x114" href="https://trading.paskomnas.id/images/favicon/apple-icon-114x114.png">\n        <link rel="apple-touch-icon" sizes="120x120" href="https://trading.paskomnas.id/images/favicon/apple-icon-120x120.png">\n        <link rel="apple-touch-icon" sizes="144x144" href="https://trading.pasko

In [None]:
!pip install selenium

Collecting selenium
  Downloading selenium-4.38.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.32.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket<1.0,>=0.12.2->selenium)
  Downloading wsproto-1.3.2-py3-none-any.whl.metadata (5.2 kB)
Downloading selenium-4.38.0-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.32.0-py3-none-any.whl (512 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.0/512.0 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloadin

In [None]:
pip install webdriver-manager

Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver-manager
Successfully installed webdriver-manager-4.0.2


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
# from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

In [None]:
!apt-get install chromium-driver

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Note, selecting 'chromium-chromedriver' instead of 'chromium-driver'
The following additional packages will be installed:
  apparmor chromium-browser libfuse3-3 libudev1 snapd squashfs-tools
  systemd-hwe-hwdb udev
Suggested packages:
  apparmor-profiles-extra apparmor-utils fuse3 zenity | kdialog
The following NEW packages will be installed:
  apparmor chromium-browser chromium-chromedriver libfuse3-3 snapd
  squashfs-tools systemd-hwe-hwdb udev
The following packages will be upgraded:
  libudev1
1 upgraded, 8 newly installed, 0 to remove and 40 not upgraded.
Need to get 34.3 MB of archives.
After this operation, 135 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 apparmor amd64 3.0.4-2ubuntu2.4 [598 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 squashfs-tools amd64 1:4.5-3build1 [159 kB]
Get:3 http://archive.ubuntu.co

In [None]:
def web_driver():
  options = webdriver.ChromeOptions()
  options.add_argument("--verbose")
  options.add_argument("--no-sandbox")
  options.add_argument("--headless")
  options.add_argument("--disable-gpu")
  options.add_argument("--window-size=1920, 1200")
  options.add_argument("--disable-dev-shm-usage")
  driver = webdriver.Chrome(options=options)
  return driver

In [None]:
driver = web_driver()

SessionNotCreatedException: Message: session not created: Chrome instance exited. Examine ChromeDriver verbose log to determine the cause.; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
Stacktrace:
#0 0x5ce01241316a <unknown>
#1 0x5ce011e90c4b <unknown>
#2 0x5ce011ecad1d <unknown>
#3 0x5ce011ec66d3 <unknown>
#4 0x5ce011f1624c <unknown>
#5 0x5ce011f1596c <unknown>
#6 0x5ce011ed4c42 <unknown>
#7 0x5ce011ed58f1 <unknown>
#8 0x5ce0123dbf09 <unknown>
#9 0x5ce0123dee4d <unknown>
#10 0x5ce0123c4c51 <unknown>
#11 0x5ce0123dfa2b <unknown>
#12 0x5ce0123aba20 <unknown>
#13 0x5ce012400a78 <unknown>
#14 0x5ce012400c49 <unknown>
#15 0x5ce0124124c3 <unknown>
#16 0x794840f4dac3 <unknown>


In [None]:
# --- Setup browser ---
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")

In [None]:
options = webdriver.ChromeOptions()
options.binary_location = r"C:\Users\Muhammad Naufal\AppData\Roaming\Microsoft\Windows\Start Menu\Programs"

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd


# === SETUP SELENIUM ===
options = Options()
options.add_argument("--start-maximized")

service = Service("chromedriver.exe")  # sesuaikan jika pakai Linux/macOS
driver = webdriver.Chrome(service=service, options=options)


# === BUKA WEBSITE ===
driver.get("https://trading.paskomnas.id/")

time.sleep(3)


# === AUTOSCROLL SAMPAI HABIS ===
def auto_scroll(driver, pause=1):
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")

        if new_height == last_height:
            break

        last_height = new_height


auto_scroll(driver, pause=2)


# === SCRAPE ITEM PRODUK ===
product_cards = driver.find_elements(By.CSS_SELECTOR, "a.product-card")

data = []

for card in product_cards:
    try:
        name = card.find_element(By.CSS_SELECTOR, ".product-card-title").text
    except:
        name = None

    try:
        price = card.find_element(By.CSS_SELECTOR, ".product-card-price").text
    except:
        price = None

    try:
        location = card.find_element(By.CSS_SELECTOR, ".product-card-location").text
    except:
        location = None

    try:
        url = card.get_attribute("href")
    except:
        url = None

    data.append({
        "name": name,
        "price": price,
        "location": location,
        "url": url
    })


# === MASUKKAN KE DATAFRAME ===
df = pd.DataFrame(data)
driver.quit()

df

NoSuchDriverException: Message: Unable to obtain driver for chrome; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location


In [None]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)