In [13]:
!pip install -q contractions scikit-learn Sastrawi googletrans==4.0.0-rc1 langdetect pandas matplotlib yfinance tensorflow xgboost
nltk.download('stopwords')
nltk.download('punkt')

**Input Data**

In [18]:
new_text = "Revision of Subsidized Fertilizer Policy, Now Farmers Can Redeem Using KTP"

In [19]:
import joblib
from google.colab import files
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from bs4 import BeautifulSoup
import re
import unicodedata
from googletrans import Translator
import contractions
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

# Fungsi-fungsi pra-pemrosesan teks
def strip_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    [s.extract() for s in soup(['iframe', 'script'])]
    stripped_text = soup.get_text()
    stripped_text = re.sub(r'[\r|\n|\r\n]+', '\n', stripped_text)
    return stripped_text

def remove_accented_chars(text):
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return text

def pre_process_text(text, language):
    text = text.lower()
    text = strip_html_tags(text)
    text = text.translate(text.maketrans("\n\t\r", "   "))
    text = remove_accented_chars(text)
    text = contractions.fix(text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text, re.I | re.A)
    text = re.sub(' +', ' ', text)
    if language == 'indonesian':
        text = preprocess_text_sastrawi(text)
    return text

# Fungsi pra-pemrosesan teks khusus Bahasa Indonesia
def preprocess_text_sastrawi(text):
    factory1 = StopWordRemoverFactory()
    stopword_sastrawi = factory1.create_stop_word_remover()

    factory2 = StemmerFactory()
    stemmer_sastrawi = factory2.create_stemmer()

    tokens = nltk.word_tokenize(text)
    tokens = [stopword_sastrawi.remove(token) for token in tokens]
    tokens = [stemmer_sastrawi.stem(token) for token in tokens if token != '']
    return " ".join(tokens)

# Load the models
tfidf_vectorizer = joblib.load('/content/tfidf_vectorizer.joblib')
rf_classifier = joblib.load('/content/random_forest_model.joblib')

# Preprocess the new text
preprocessed_text = pre_process_text(new_text, 'indonesian')

# Mengonversi teks yang telah di-preprocess menjadi fitur TF-IDF
new_text_tfidf = tfidf_vectorizer.transform([preprocessed_text])

# Melakukan prediksi sentimen menggunakan model Random Forest
predicted_label = rf_classifier.predict(new_text_tfidf)

translator = Translator()
translated_text = translator.translate(new_text, dest='en').text

# Mengonversi teks yang telah diterjemahkan menjadi fitur TF-IDF
translated_text_tfidf = tfidf_vectorizer.transform([translated_text])

# Menampilkan prediksi sentimen
predicted_sentiment = rf_classifier.predict(translated_text_tfidf)
sentiment_probability = rf_classifier.predict_proba(translated_text_tfidf)[0, 1]

threshold = 0.5  # Threshold bisa diatur sesuai kebutuhan
sentiment = "Positive" if sentiment_probability > threshold else "Negative"

# Print hasil prediksi
print("\nText:", translated_text)
print("Sentiment Probability:", sentiment_probability)
print("Sentiment:", sentiment)



Text: Revision of Subsidized Fertilizer Policy, Now Farmers Can Redeem Using KTP
Sentiment Probability: 0.66
Sentiment: Positive
