In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.colab import files

# --- Web Scraping ---
url = 'https://umsu.ac.id/berita/10-rekomendasi-tempat-wisata-sumut-2024-cocok-untuk-libur-lebaran-bersama-keluarga/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

response = requests.get(url, headers=headers)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')

    # Identifikasi elemen konten artikel
    article_content = soup.find('div', class_='entry-content')  # Sesuaikan dengan struktur HTML halaman
    if article_content:
        # Ambil elemen yang berisi tempat wisata (judul dan gambar)
        data = []

        for item in article_content.find_all(['h2', 'h3', 'li']):  # Cari judul tempat wisata
            title = item.text.strip()
            # Cari gambar di sekitar elemen ini
            image = item.find_next('img')
            image_url = image['src'] if image else 'No image available'

            if title:  # Hindari data kosong
                data.append({'Place': title, 'Image URL': image_url})

        # Buat DataFrame
        df = pd.DataFrame(data)

        if not df.empty:
            print(df.head())  # Tampilkan beberapa data

            # Simpan ke CSV
            df.to_csv('tempat_wisata_sumut_umsu_with_images.csv', index=False)
            print("Data berhasil disimpan ke tempat_wisata_sumut_umsu_with_images.csv")

            # Unduh file CSV
            files.download('tempat_wisata_sumut_umsu_with_images.csv')
        else:
            print("Tidak ada data yang ditemukan.")
    else:
        print("Tidak dapat menemukan elemen konten artikel.")
else:
    print(f"Gagal mengakses halaman: {response.status_code}")

                                               Place  \
0  10 Rekomendasi Tempat Wisata Sumut 2024, Cocok...   
1  Pulau Samosir\nPulau Samosir\nPulau Samosir ad...   
2                                      Pulau Samosir   
3  Istana Maimun\nIstana Maimoon\n\nIstana Maimun...   
4                                      Istana Maimun   

                                           Image URL  
0  https://i0.wp.com/umsu.ac.id/berita/wp-content...  
1  https://i0.wp.com/umsu.ac.id/berita/wp-content...  
2  https://i0.wp.com/umsu.ac.id/berita/wp-content...  
3  https://i0.wp.com/umsu.ac.id/berita/wp-content...  
4  https://i0.wp.com/umsu.ac.id/berita/wp-content...  
Data berhasil disimpan ke tempat_wisata_sumut_umsu_with_images.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>