# 📚 Project Based Learning (PBL)
## Integrasi Big Data, Blockchain, AI, dan Machine Learning dalam Kajian Sastra Indonesia

Notebook ini mencakup:
- Ekstraksi teks dari gambar menggunakan OCR
- Normalisasi ejaan lama ke ejaan baru
- Klasifikasi genre sastra menggunakan Machine Learning
- Simulasi Blockchain untuk distribusi karya sastra
- Visualisasi data sastra Indonesia


In [1]:
# ✅ Instalasi pustaka yang dibutuhkan
!pip install pytesseract pillow matplotlib scikit-learn
!apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-ind

Collecting pytesseract
  Using cached pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Using cached pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13



[notice] A new release of pip is available: 25.0 -> 25.1.1
[notice] To update, run: C:\Users\admin\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip
'apt-get' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
# 🔎 Ekstraksi teks dari gambar menggunakan OCR
from PIL import Image
import pytesseract

# Ganti dengan path file gambar kamu
image = Image.open('halaman_buku.jpg')
teks = pytesseract.image_to_string(image, lang='ind')
print(teks)

FileNotFoundError: [Errno 2] No such file or directory: 'halaman_buku.jpg'

In [None]:
# 🔤 Normalisasi ejaan lama
mapping = {'dj': 'j', 'tj': 'c', 'oe': 'u', 'nj': 'ny', 'sj': 'sy', 'ch': 'kh'}
for lama, baru in mapping.items():
    teks = teks.replace(lama, baru)
print(teks)

In [None]:
# 🤖 Klasifikasi genre menggunakan Machine Learning
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

corpus_teks = [
    "Aku adalah api yang membakar jiwamu...",
    "Dialog antara dua insan di balik tirai waktu...",
    "Ia berjalan menuju pasar sambil membawa secarik kertas..."
]
label_genre = ['Puisi', 'Drama', 'Prosa']

model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(corpus_teks, label_genre)

# Coba prediksi
hasil_prediksi = model.predict([teks])
print("Prediksi genre:", hasil_prediksi[0])

In [None]:
# 🔗 Simulasi Blockchain untuk distribusi karya sastra
import hashlib, time

class Block:
    def __init__(self, index, data, prev_hash):
        self.index = index
        self.timestamp = time.time()
        self.data = data
        self.prev_hash = prev_hash
        self.hash = self.calculate_hash()

    def calculate_hash(self):
        return hashlib.sha256(f"{self.index}{self.timestamp}{self.data}{self.prev_hash}".encode()).hexdigest()

class Blockchain:
    def __init__(self):
        self.chain = [Block(0, "Genesis Block", "0")]

    def add_block(self, data):
        prev_hash = self.chain[-1].hash
        new_block = Block(len(self.chain), data, prev_hash)
        self.chain.append(new_block)

bc = Blockchain()
bc.add_block("Jual buku A, harga 100K, penulis X, royalti 10%")
for block in bc.chain:
    print(vars(block))

In [None]:
# 📊 Visualisasi data karya sastra
import matplotlib.pyplot as plt

genre = ['Prosa', 'Puisi', 'Drama']
jumlah = [120, 80, 30]

plt.bar(genre, jumlah, color=['skyblue', 'salmon', 'gold'])
plt.title('Jumlah Karya per Genre Sastra')
plt.ylabel('Jumlah Karya')
plt.show()