In [3]:
import pke
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download('punkt')
nltk.download('stopwords')

text = """
Penelitian ini bertujuan untuk menganalisis efektivitas metode pembelajaran berbasis proyek
dalam meningkatkan kemampuan berpikir kritis mahasiswa pada mata kuliah Ilmu Komputer.
Metode penelitian yang digunakan adalah eksperimen dengan desain pretest-posttest control group.
Hasil penelitian menunjukkan adanya peningkatan signifikan pada kelompok eksperimen.
"""

# stopwords bahasa Indonesia
stoplist = set(stopwords.words("indonesian"))

# preprocessing manual
tokens = word_tokenize(text.lower())
tokens = [w for w in tokens if w.isalpha() and w not in stoplist]

# bikin input dummy: (token, POS)
doc = [[(token, "NOUN") for token in tokens]]

# fungsi ekstraksi dengan fallback manual
def extract_with_method(method, doc, top_n=5):
    extractor = method
    extractor.load_document(doc)
    extractor.candidate_selection()
    extractor.candidate_weighting()
    return extractor.get_n_best(n=top_n)

methods = {
    "TF-IDF": pke.unsupervised.TfIdf(),
    "YAKE": pke.unsupervised.YAKE(),
    "TextRank": pke.unsupervised.TextRank(),
    "TopicRank": pke.unsupervised.TopicRank(),
    "SingleRank": pke.unsupervised.SingleRank(),
    "MultipartiteRank": pke.unsupervised.MultipartiteRank(),
}

for name, method in methods.items():
    try:
        keywords = extract_with_method(method, doc, top_n=5)
        print(f"\n=== {name} ===")
        print(keywords)
    except Exception as e:
        print(f"\n=== {name} ===")
        print(f"Error: {e}")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\magang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\magang\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!



=== TF-IDF ===
[('penelitian', 21.539727270044803), ('metode', 14.359818180029869), ('eksperimen', 14.359818180029869), ('penelitian bertujuan', 7.1799090900149345), ('penelitian bertujuan menganalisis', 7.1799090900149345)]

=== YAKE ===
[('bertujuan menganalisis efektivitas', np.float64(0.002844459784961418)), ('pembelajaran berbasis proyek', np.float64(0.002844459784961418)), ('berbasis proyek meningkatkan', np.float64(0.002844459784961418)), ('proyek meningkatkan kemampuan', np.float64(0.002844459784961418)), ('meningkatkan kemampuan berpikir', np.float64(0.002844459784961418))]

=== TextRank ===
[('penelitian bertujuan menganalisis efektivitas metode pembelajaran berbasis proyek meningkatkan kemampuan berpikir kritis mahasiswa mata kuliah ilmu komputer metode penelitian eksperimen desain control group hasil penelitian peningkatan signifikan kelompok eksperimen', 1.266675734879455)]

=== TopicRank ===
[]

=== SingleRank ===
[('penelitian bertujuan menganalisis efektivitas metode p

In [None]:
from qdrant_client import QdrantClient
import os

# === KONFIGURASI ===
QDRANT_URL = "http://localhost:6333"  # ganti dengan URL instance Qdrant kamu
COLLECTION_NAME = "nama_collection_kamu"  # ganti dengan nama collection kamu
OUTPUT_FILE = "qdrant_chunks.md"

# Jika pakai Qdrant Cloud:
# QDRANT_URL = "https://xxx-xxxxx.qdrant.tech"
# API_KEY = "your-api-key"
# client = QdrantClient(url=QDRANT_URL, api_key=API_KEY)

# === KONEKSI KE QDRANT ===
client = QdrantClient(url=QDRANT_URL)

# === AMBIL SEMUA DATA ===
print(f"ðŸ“¡ Mengambil data dari collection '{COLLECTION_NAME}' ...")

scroll_iter = client.scroll(
    collection_name=COLLECTION_NAME,
    limit=100,             # ambil 100 per batch (bisa ubah)
    with_payload=True,     # ambil metadata/payload
    with_vectors=False     # tidak perlu ambil vektor
)

all_points = []
while True:
    points, next_page = scroll_iter
    all_points.extend(points)
    if next_page is None:
        break
    scroll_iter = client.scroll(
        collection_name=COLLECTION_NAME,
        offset=next_page,
        limit=100,
        with_payload=True,
        with_vectors=False
    )

print(f"âœ… Ditemukan {len(all_points)} points dari Qdrant.")

# === SIMPAN KE MARKDOWN ===
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    f.write(f"# ðŸ“„ Original Content dari Collection `{COLLECTION_NAME}`\n\n")

    for i, point in enumerate(all_points, start=1):
        metadata = point.payload or {}
        content = metadata.get("original_content", "")
        f.write(f"## ðŸ§© Chunk {i} (ID: {point.id})\n\n")
        f.write(content.strip() + "\n\n---\n\n")

print(f"âœ… File Markdown berhasil dibuat: {OUTPUT_FILE}")
