In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [8]:
# Fungsi untuk mengambil data dari satu halaman
def scrape_page(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Mengambil elemen jurnal dari halaman
    titles = []
    authors = []
    indekses = []
    
    journal_entries = soup.find_all('div', class_='col-lg meta-side')

    for journal in journal_entries:
        # Mengambil Judul
        title_element = journal.find('div', class_='affil-name mb-3').find('a')
        titles.append(title_element.text.strip() if title_element else 'No Title')
        
        # Mengambil nama penulis
        authors_element = journal.find('div', class_='affil-loc mt-2').find('a')
        authors.append(authors_element.text.strip() if authors_element else 'No Authors')

        # Mengambil Indeks
        indeks_element = journal.find('span', class_='num-stat accredited')
        if indeks_element:
            indekses.append(indeks_element.text.strip().replace("==", "").strip())  # Menghapus "==" dari nilai
        else:
            indekses.append('No Indeks')
    
    return titles, authors, indekses

# Fungsi untuk melakukan scraping ke beberapa halaman
def scrape_sinta_journals(base_url, total_pages):
    all_titles = []
    all_authors = []
    all_indekses = []

    for page in range(1, total_pages + 1):
        page_url = f"{base_url}?page={page}"
        titles, authors, indekses = scrape_page(page_url)
        all_titles.extend(titles)
        all_authors.extend(authors)
        all_indekses.extend(indekses)

    return all_titles, all_authors, all_indekses

base_url = 'https://sinta.kemdikbud.go.id/journals/' 
total_pages = 2  # Jumlah halaman yang ingin di-scrape
titles, authors, indekses = scrape_sinta_journals(base_url, total_pages)


In [9]:
len (titles), len(authors), len(indekses)

(20, 20, 20)

In [10]:
# Membuat DataFrame dari judul, penulis, dan indeks
df = pd.DataFrame(
    {
        'Judul Journal': titles,
        'Authors': authors, 
        'Indeks': indekses
    }
)
df.head(5)

Unnamed: 0,Judul Journal,Authors,Indeks
0,"International Journal on Advanced Science, Eng...",Indonesian Society for Knowledge and Human Dev...,S1 Accredited
1,Ahkam: Jurnal Ilmu Syariah,Universitas Islam Negeri Syarif Hidayatullah J...,S1 Accredited
2,Indonesian Journal of Science and Technology,Universitas Pendidikan Indonesia,S1 Accredited
3,Asean Journal of Science and Engineering (AJSE),Universitas Pendidikan Indonesia,S1 Accredited
4,Makara Journal of Science,Universitas Indonesia,S1 Accredited


In [11]:
output_file = 'D:/TUGAS KAMPUS SEMESTER 3/PEMROSESAN TEKS/SCRAPING.csv'
df.to_csv(output_file, index=False, encoding='utf-8-sig')  # Menggunakan encoding untuk mendukung karakter khusus
print(f"Data telah disimpan ke {output_file}")

Data telah disimpan ke D:/TUGAS KAMPUS SEMESTER 3/PEMROSESAN TEKS/SCRAPING.csv
