In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_ojk_regulations_with_links():
    base_url = "https://www.ojk.go.id/id/Regulasi/Default.aspx"
    #base_url = "https://lps.go.id/plps/"
    data = []

    for page in range(1, 4):  # Loop untuk 3 halaman
        url = f"{base_url}?page={page}"  # Perbaiki URL dengan format dinamis
        response = requests.get(url)

        # Pastikan respons valid
        if response.status_code != 200:
            print(f"Gagal mengakses halaman {page}: {response.status_code}")
            continue

        soup = BeautifulSoup(response.text, 'html.parser')

        # Temukan tabel berdasarkan kelas
        table = soup.find('table', {'class': 'table table-bordered table-styled'})
        if not table:
            print(f"Tabel tidak ditemukan di halaman {page}.")
            continue

        rows = table.find_all('tr')
        for row in rows[1:]:  # Lewati header
            cols = row.find_all('td')
            if len(cols) >= 2:
                # Ambil link dan caption dari elemen <a> dan elemen lainnya
                link_tag = cols[1].find('a')
                link = link_tag['href'] if link_tag else None
                caption_tag = cols[1].find('div', class_='caption')
                caption = caption_tag.text.strip() if caption_tag else None

                # Tambahkan data ke dalam list
                data.append({
                    'Nomor Regulasi': cols[0].text.strip(),
                    'Judul Regulasi': cols[1].text.strip(),
                    'Link': link,
                    'Caption': caption
                })

    # Konversi hasil ke DataFrame
    return pd.DataFrame(data)

# Eksekusi
regulations = scrape_ojk_regulations_with_links()
print(regulations)


      Nomor Regulasi                                     Judul Regulasi  \
0      21 Tahun 2024  Laporan Berkala Dana Pensiun\n\n\r\n          ...   
1   20/SEOJK.07/2024  Penyelenggaraan Perdagangan Aset Keuangan Digi...   
2      27 Tahun 2024  Penyelenggaraan Perdagangan Aset Keuangan Digi...   
3      23 Tahun 2024  Pelaporan Melalui Sistem Pelaporan Otoritas Ja...   
4      22 Tahun 2024  Laporan Berkala Perusahaan Perasuransian\n\n\r...   
5      26 Tahun 2024  Perluasan Kegiatan Usaha Perbankan\n\n\r\n    ...   
6   18/SEOJK.08/2024  Penilaian Sendiri Terhadap Pemenuhan Ketentuan...   
7   16/SEOJK.03/2024  Pelaporan Melalui Sistem Pelaporan Otoritas Ja...   
8      25 Tahun 2024  Penerapan Tata Kelola Syariah Bagi Bank Pereko...   
9      24 Tahun 2024  Kualitas Aset Bank Perekonomian Rakyat Syariah...   
10     21 Tahun 2024  Laporan Berkala Dana Pensiun\n\n\r\n          ...   
11  20/SEOJK.07/2024  Penyelenggaraan Perdagangan Aset Keuangan Digi...   
12     27 Tahun 2024  Pen

In [3]:
from datetime import datetime

def generate_report(new_regulations):
    if new_regulations.empty:
        print("Tidak ada regulasi baru untuk dilaporkan.")
        return

    # Nama file laporan berdasarkan tanggal
    filename = f"report_regulasi_{datetime.now().strftime('%Y%m%d')}.xlsx"

    # Simpan DataFrame sebagai file Excel
    new_regulations.to_excel(filename, index=False, engine='openpyxl')
    print(f"Laporan berhasil disimpan sebagai {filename}")

In [4]:
generate_report(regulations)

Laporan berhasil disimpan sebagai report_regulasi_20250103.xlsx


In [5]:
import sqlite3

def save_to_database(df, db_name="regulations.db"):
    conn = sqlite3.connect(db_name)
    df.to_sql('ojk_regulations', conn, if_exists='replace', index=False)
    conn.close()

def compare_with_old_data(new_df, db_name="regulations.db"):
    conn = sqlite3.connect(db_name)
    old_df = pd.read_sql('SELECT * FROM ojk_regulations', conn)
    conn.close()

    new_regulations = pd.merge(new_df, old_df, how='outer', indicator=True)
    return new_regulations[new_regulations['_merge'] == 'left_only']

In [6]:
save_to_database(regulations)

In [7]:
new_regulations = compare_with_old_data(regulations)

In [8]:
print(new_regulations)

Empty DataFrame
Columns: [Nomor Regulasi, Judul Regulasi, Link, Caption, _merge]
Index: []


In [9]:
pip install schedule

Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Downloading schedule-1.2.2-py3-none-any.whl (12 kB)
Installing collected packages: schedule
Successfully installed schedule-1.2.2


In [10]:
import smtplib
from email.mime.text import MIMEText

def send_email_notification(new_regulations):
    smtp_server = "smtp.gmail.com"
    port = 587
    sender_email = "justinus.natanael@gmail.com"
    password = "bmjc ywtw ocnb svbi"

    message = MIMEText(new_regulations.to_html(), 'html')
    message['Subject'] = "Notifikasi Regulasi Baru"
    message['From'] = sender_email
    message['To'] = "justinus.nathanael@gmail.com"
    #message['To'] = "Reyhan.Prasetyo@cimbniaga.co.id"


    with smtplib.SMTP(smtp_server, port) as server:
        server.starttls()
        server.login(sender_email, password)
        server.send_message(message)


new_regulations = compare_with_old_data(regulations)

if not new_regulations.empty:
    send_email_notification(new_regulations)

In [11]:
generate_report(new_regulations)

Tidak ada regulasi baru untuk dilaporkan.


In [12]:
import requests
from bs4 import BeautifulSoup
import os

def extract_documents(base_url, page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Cari semua link dokumen di halaman
    document_links = []
    for link in soup.find_all('a', href=True):
        href = link['href']
        if href.endswith(('.pdf', '.doc', '.docx')):  # Filter file dokumen
            full_url = base_url + href if not href.startswith('http') else href
            document_links.append(full_url)

    # Unduh dokumen
    os.makedirs('downloaded_documents', exist_ok=True)
    for doc_url in document_links:
        try:
            response = requests.get(doc_url)
            filename = os.path.join('downloaded_documents', doc_url.split('/')[-1])
            with open(filename, 'wb') as f:
                f.write(response.content)
            print(f"Dokumen disimpan: {filename}")
        except Exception as e:
            print(f"Gagal mengunduh {doc_url}: {e}")

# Contoh penggunaan
base_url = "https://ojk.go.id"
page_url = "https://ojk.go.id/id/regulasi/Pages/Asosiasi-Penyelenggara-Inovasi-Teknologi-Sektor-Keuangan.aspx"
extract_documents(base_url, page_url)


Dokumen disimpan: downloaded_documents/SEOJK%208-SEOJK.07-2024%20Asosiasi%20Penyelenggara%20Inovasi%20Teknologi%20Sektor%20Keuangan.pdf
Dokumen disimpan: downloaded_documents/Abstrak%20SEOJK%208-SEOJK.07-2024%20Asosiasi%20Penyelenggara%20Inovasi%20Teknologi%20Sektor%20Keuangan.pdf
Dokumen disimpan: downloaded_documents/FAQ%20SEOJK%208-SEOJK.07-2024%20Asosiasi%20Penyelenggara%20Inovasi%20Teknologi%20Sektor%20Keuangan.pdf


In [13]:
import schedule
import time
from datetime import datetime
import pandas as pd
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders


# Fungsi untuk mengirim laporan melalui email
def send_report_via_email(frequency, df):
    filename = generate_report(df, frequency)
    smtp_server = "smtp.gmail.com"
    port = 587
    sender_email = "justinus.natanael@gmail.com"
    password = "bmjc ywtw ocnb svbi"

    message = MIMEMultipart()
    message['Subject'] = f"Laporan Regulasi {frequency.capitalize()}"
    message['From'] = sender_email
    message['To'] = "justinus.nathanael@gmail.com"
    #message['To'] = "Reyhan.Prasetyo@cimbniaga.co.id"

        # Tambahkan lampiran
    with open(filename, "rb") as attachment:
        part = MIMEBase("application", "octet-stream")
        part.set_payload(attachment.read())
    encoders.encode_base64(part)
    part.add_header("Content-Disposition", f"attachment; filename={filename}")
    message.attach(part)

    with smtplib.SMTP(smtp_server, port) as server:
        server.starttls()
        server.login(sender_email, password)
        server.send_message(message)
    print(f"Laporan {frequency} telah dikirimkan melalui email.")

def send_annual_report_if_new_year(data):
    from datetime import datetime
    if datetime.now().strftime("%m-%d") == "01-01":
        send_report_via_email("tahunan", data)

data = 'report_regulasi_20250103.xslx'

# Jadwalkan pengiriman laporan
schedule.every().day.at("18:00").do(send_report_via_email, "harian", data)  # Harian
schedule.every().monday.at("18:00").do(send_report_via_email, "mingguan", data)  # Mingguan
schedule.every(1).to(31).days.at("18:00").do(send_report_via_email, "bulanan", data) # Bulanan
schedule.every().day.at("18:00").do(lambda: send_annual_report_if_new_year(data)) # Tahunan


# Jalankan penjadwalan
while True:
    schedule.run_pending()
    time.sleep(60)


KeyboardInterrupt: 

In [14]:
import schedule
import time
from datetime import datetime

# Fungsi untuk memeriksa apakah hari ini adalah hari kerja

def is_friday():
  f"report_{frequency}_{datetime.now().strftime('%Y%m%d')}.xlsx"
  today = datetime.now().weekday()
  return today == 4  # 4 = Friday

def send_report_via_email(frequency, df):
    filename = generate_report(df, frequency)
    smtp_server = "smtp.gmail.com"
    port = 587
    sender_email = "justinus.natanael@gmail.com"
    password = "bmjc ywtw ocnb svbi"

    message = MIMEText(new_regulations.to_html(), 'html')
    message['Subject'] = "Notifikasi Regulasi Baru"
    message['From'] = sender_email
    message['To'] = "justinus.nathanael@gmail.com"
    #message['To'] = "Reyhan.Prasetyo@cimbniaga.co.id"


    with smtplib.SMTP(smtp_server, port) as server:
        server.starttls()
        server.login(sender_email, password)
        server.send_message(message)
    print(f"Laporan {frequency} telah dikirimkan melalui email.")

def scrape_and_send_report():
    if is_friday():
        print("Hari Jumat, menjalankan scraping dan mengirim laporan...")
        regulations = scrape_ojk_regulations()
        send_email_report(regulations)
    else:
        print("Bukan hari Jumat, tidak mengirim laporan.")

# Jadwalkan setiap 30 menit
schedule.every(30).minutes.do(scrape_ojk_regulations_with_links)
schedule.every().day.at("17:00").do(send_report_via_email)

while True:
    schedule.run_pending()
    time.sleep(1)

KeyboardInterrupt: 