# Crawling

In [1]:
pip install sprynger

Defaulting to user installation because normal site-packages is not writeable
Collecting sprynger
  Downloading sprynger-0.4.1-py3-none-any.whl.metadata (5.8 kB)
Downloading sprynger-0.4.1-py3-none-any.whl (40 kB)
Installing collected packages: sprynger
Successfully installed sprynger-0.4.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
from bs4 import BeautifulSoup
import csv

def crawl_springer_nature(keyword):
    # Buat URL pencarian dengan kata kunci
    base_url = "https://www.springernature.com/gp/librarians/products/journals/nature-research-journals/nature-reviews"
    search_url = f"https://www.springernature.com/search?query={keyword}" #ini contoh, perlu disesuaikan dengan URL yang sebenarnya

    try:
        response = requests.get(search_url)
        response.raise_for_status() # Cek jika ada error HTTP

        soup = BeautifulSoup(response.text, 'html.parser')
        # Temukan elemen yang berisi informasi jurnal (misal, judul, penulis, dll)
        # Ini tergantung pada struktur HTML dari situs
        articles = soup.find_all('div', class_='article-item') # class ini hanyalah contoh

        data = []
        for article in articles:
            title = article.find('h2').text.strip()
            author = article.find('span', class_='author-name').text.strip()
            # Lakukan ekstraksi data lain yang relevan
            data.append({'title': title, 'author': author, 'keyword': keyword})

        return data
    except requests.exceptions.RequestException as e:
        print(f"Error saat melakukan permintaan: {e}")
        return None

# Variasi kata kunci
keywords = ['web mining', 'web usage mining', 'data mining']

all_data = []
for keyword in keywords:
    print(f"Mencari data untuk kata kunci: {keyword}...")
    results = crawl_springer_nature(keyword)
    if results:
        all_data.extend(results)

# Simpan data ke file CSV
csv_filename = 'springer_nature_results.csv'
if all_data:
    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['title', 'author', 'keyword'] # Nama kolom di CSV
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        writer.writerows(all_data)

    print(f"Data berhasil disimpan ke {csv_filename}")
else:
    print("Tidak ada data yang ditemukan untuk disimpan.")

Mencari data untuk kata kunci: web mining...
Error saat melakukan permintaan: 404 Client Error: Not Found for url: https://www.springernature.com/search?query=web%20mining
Mencari data untuk kata kunci: web usage mining...
Error saat melakukan permintaan: 404 Client Error: Not Found for url: https://www.springernature.com/search?query=web%20usage%20mining
Mencari data untuk kata kunci: data mining...
Error saat melakukan permintaan: 404 Client Error: Not Found for url: https://www.springernature.com/search?query=data%20mining
Tidak ada data yang ditemukan untuk disimpan.
