# Analisis Sentimen Komentar YouTube terhadap Kebijakan Pemerintahan

Notebook ini berisi proses pengumpulan dan analisis komentar YouTube terkait kebijakan pemerintahan. Proses meliputi:
1. Pengumpulan data komentar dari YouTube menggunakan YouTube Data API
2. Preprocessing data komentar
3. Analisis sentimen dan visualisasi hasil

## 1. Import Library yang Dibutuhkan

In [None]:
import urllib.request
import urllib.parse
import json
import csv
import time
import random
import ssl
import pandas as pd
import numpy as np
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

## 2. Konfigurasi API dan Fungsi Pengumpulan Data

In [None]:
# Bypass SSL certificate verification
ssl._create_default_https_context = ssl._create_unverified_context

# API Key Configuration
API_KEY = "AIzaSyBJYCAwEeCM0fFCTtqURETBDXGM7oGUg8o"

In [None]:
def get_video_ids(query, max_results=50):
    """Mendapatkan daftar video ID berdasarkan query pencarian"""
    base_search_url = "https://www.googleapis.com/youtube/v3/search"
    video_ids = []
    
    params = {
        "part": "snippet",
        "q": query,
        "key": API_KEY,
        "maxResults": max_results,
        "type": "video",
    }

    search_url = f"{base_search_url}?{urllib.parse.urlencode(params)}"

    try:
        with urllib.request.urlopen(search_url) as response:
            search_results = json.loads(response.read().decode())

        for item in search_results.get("items", []):
            video_ids.append(item["id"]["videoId"])

    except Exception as e:
        print(f"Error dalam pencarian video: {e}")

    return video_ids

In [None]:
def clean_text(text):
    """Membersihkan teks komentar"""
    text = text.replace("\n", " ").replace("\r", " ").replace("\t", " ")
    text = " ".join(text.split())
    return text

class CommentWriter:
    def __init__(self, filename):
        self.filename = filename
        self.comment_count = 0
        
        with open(self.filename, "w", newline="", encoding="utf-8") as file:
            writer = csv.writer(file)
            writer.writerow(["Comment", "Likes", "Timestamp", "Video ID", "Query"])

    def write_comment(self, comment, video_id, query):
        with open(self.filename, "a", newline="", encoding="utf-8") as file:
            writer = csv.writer(file)
            writer.writerow([
                clean_text(comment["text"]),
                comment["likes"],
                comment["timestamp"],
                video_id,
                query,
            ])
            self.comment_count += 1

        if self.comment_count % 10 == 0:
            print(f"Progress: {self.comment_count} komentar telah dikumpulkan")

In [None]:
def get_video_comments(video_id, query, writer, target_comments=200):
    """Mengambil komentar dari video tertentu dan menulis langsung ke CSV"""
    base_comment_url = "https://www.googleapis.com/youtube/v3/commentThreads"
    comments_collected = 0
    
    params = {
        "part": "snippet",
        "videoId": video_id,
        "key": API_KEY,
        "maxResults": 100,
        "textFormat": "plainText",
    }

    try:
        while comments_collected < target_comments:
            comment_url = f"{base_comment_url}?{urllib.parse.urlencode(params)}"

            with urllib.request.urlopen(comment_url) as response:
                response_data = json.loads(response.read().decode())

            for item in response_data.get("items", []):
                comment = {
                    "text": item["snippet"]["topLevelComment"]["snippet"]["textDisplay"],
                    "likes": item["snippet"]["topLevelComment"]["snippet"]["likeCount"],
                    "timestamp": item["snippet"]["topLevelComment"]["snippet"]["publishedAt"],
                }
                writer.write_comment(comment, video_id, query)
                comments_collected += 1

                if comments_collected >= target_comments:
                    break

            if "nextPageToken" in response_data and comments_collected < target_comments:
                params["pageToken"] = response_data["nextPageToken"]
            else:
                break

            time.sleep(random.uniform(1, 3))

    except Exception as e:
        print(f"Error dalam mengambil komentar untuk video {video_id}: {e}")

    return comments_collected

## 3. Pengumpulan Data Komentar

In [None]:
# Daftar kata kunci pencarian
search_queries = [
    "kebijakan ekonomi presiden prabowo 2025",
    "kontroversi program sosial presiden prabowo",
    "reaksi publik terhadap instruksi presiden nomor 1 tahun 2025",
    "perubahan undang-undang militer di era prabowo",
    "dampak kebijakan anggaran pemerintah prabowo terhadap infrastruktur",
    "tanggapan masyarakat atas program makan gratis nasional",
    "pro dan kontra kebijakan penyimpanan devisa hasil ekspor",
    "kontroversi keterlibatan militer dalam pemerintahan prabowo",
    "reaksi pasar terhadap kebijakan ekonomi presiden prabowo",
    "kritik terhadap pemotongan anggaran kementerian di era prabowo",
]

output_file = "youtube_comments.csv"
writer = CommentWriter(output_file)
target_total_comments = 10000
comments_per_video = 200

print("Memulai proses pengumpulan data...")

for query in search_queries:
    if writer.comment_count >= target_total_comments:
        break

    print(f"\nMencari video untuk query: {query}")
    video_ids = get_video_ids(query)

    for video_id in video_ids:
        if writer.comment_count >= target_total_comments:
            break

        print(f"Mengambil komentar dari video: {video_id}")
        comments_collected = get_video_comments(
            video_id,
            query,
            writer,
            min(comments_per_video, target_total_comments - writer.comment_count),
        )

        time.sleep(random.uniform(2, 5))

print(f"\nBerhasil mengumpulkan {writer.comment_count} komentar!")
print(f"Data tersimpan di: {output_file}")

## 4. Preprocessing Data

Tahap preprocessing meliputi:
1. Case folding
2. Pembersihan teks (URL, mention, hashtag, dll)
3. Penghapusan stopwords
4. Stemming

In [None]:
# Baca data dari CSV
df = pd.read_csv('youtube_comments.csv')
print(f"Jumlah data sebelum preprocessing: {len(df)}")
df.head()

In [None]:
def preprocess_text(text):
    """Fungsi untuk melakukan preprocessing pada teks komentar"""
    # Case folding
    text = text.lower()
    
    # Hapus URL
    text = re.sub(r'http\S+|www.\S+', '', text)
    
    # Hapus mention dan hashtag
    text = re.sub(r'@\w+|#\w+', '', text)
    
    # Hapus karakter khusus dan angka
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    # Hapus multiple spaces
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text

# Inisialisasi stemmer dan stopword remover
stemmer = StemmerFactory().create_stemmer()
stopword_remover = StopWordRemoverFactory().create_stop_word_remover()

# Terapkan preprocessing
df['cleaned_comment'] = df['Comment'].apply(preprocess_text)
df['no_stopwords'] = df['cleaned_comment'].apply(stopword_remover.remove)
df['stemmed'] = df['no_stopwords'].apply(stemmer.stem)

# Simpan hasil preprocessing
df.to_csv('preprocessed_comments.csv', index=False)
print("\nHasil preprocessing telah disimpan ke 'preprocessed_comments.csv'")

# Tampilkan contoh hasil
print("\nContoh hasil preprocessing:")
sample_results = df[['Comment', 'cleaned_comment', 'no_stopwords', 'stemmed']].head()
print(sample_results)

## 5. Analisis Data

Beberapa analisis dasar pada data yang telah dipreprocess:

In [None]:
# Analisis jumlah kata per komentar
df['word_count'] = df['cleaned_comment'].apply(lambda x: len(str(x).split()))

print("Statistik jumlah kata per komentar:")
print(df['word_count'].describe())

# Analisis distribusi likes
print("\nStatistik jumlah likes:")
print(df['Likes'].describe())

# Analisis berdasarkan query pencarian
print("\nDistribusi komentar berdasarkan query pencarian:")
query_distribution = df['Query'].value_counts()
print(query_distribution)