In [1]:
# Sentiment Analysis Dependencies
!pip install -q contractions scikit-learn Sastrawi googletrans==4.0.0-rc1 langdetect gdown
import joblib
from google.colab import files
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from bs4 import BeautifulSoup
import nltk
import re
import unicodedata
from googletrans import Translator
import contractions
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
nltk.download('stopwords')
nltk.download('punkt')

# Time Series Dependencies
import gdown
import tensorflow as tf
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import yfinance as yf

def download_sentiment_models():
    file_id_tfidf = '134JrTPXdmm6lXZH84ZGk-xsQsEyvvnrL'
    file_id_rf = '1zhdKOAbGP_wsQRRrhbbxRjQ6Wep_3LKu'
    url_tfidf = f'https://drive.google.com/uc?id={file_id_tfidf}'
    url_rf = f'https://drive.google.com/uc?id={file_id_rf}'

    output_tfidf = 'tfidf_vectorizer.joblib'
    output_rf = 'random_forest_model.joblib'

    gdown.download(url_tfidf, output_tfidf, quiet=False)
    gdown.download(url_rf, output_rf, quiet=False)

def download_time_series_model():
    file_id_time_series = '1hQqkpeXQOXH79bNCin1o4So1vGdl3Ent'
    url_time_series = f'https://drive.google.com/uc?id={file_id_time_series}'
    output_time_series = 'time_series_model.h5'

    gdown.download(url_time_series, output_time_series, quiet=False)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


**Input Data**

In [2]:
new_text = "Revision of Subsidized Fertilizer Policy, Now Farmers Can Redeem Using KTP"

stock_symbol = 'FTT-USD'
start_date = '2022-11-14'
end_date = '2023-11-14'

In [3]:
# Fungsi-fungsi pra-pemrosesan teks
def strip_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    [s.extract() for s in soup(['iframe', 'script'])]
    stripped_text = soup.get_text()
    stripped_text = re.sub(r'[\r|\n|\r\n]+', '\n', stripped_text)
    return stripped_text

def remove_accented_chars(text):
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
    return text

def pre_process_text(text, language):
    text = text.lower()
    text = strip_html_tags(text)
    text = text.translate(text.maketrans("\n\t\r", "   "))
    text = remove_accented_chars(text)
    text = contractions.fix(text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text, re.I | re.A)
    text = re.sub(' +', ' ', text)
    if language == 'indonesian':
        text = preprocess_text_sastrawi(text)
    return text

# Fungsi pra-pemrosesan teks khusus Bahasa Indonesia
def preprocess_text_sastrawi(text):
    factory1 = StopWordRemoverFactory()
    stopword_sastrawi = factory1.create_stop_word_remover()

    factory2 = StemmerFactory()
    stemmer_sastrawi = factory2.create_stemmer()

    tokens = nltk.word_tokenize(text)
    tokens = [stopword_sastrawi.remove(token) for token in tokens]
    tokens = [stemmer_sastrawi.stem(token) for token in tokens if token != '']
    return " ".join(tokens)

# Load the models
tfidf_vectorizer = joblib.load('/content/tfidf_vectorizer.joblib')
rf_classifier = joblib.load('/content/random_forest_model.joblib')

# Preprocess the new text
preprocessed_text = pre_process_text(new_text, 'indonesian')

# Mengonversi teks yang telah di-preprocess menjadi fitur TF-IDF
new_text_tfidf = tfidf_vectorizer.transform([preprocessed_text])

# Melakukan prediksi sentimen menggunakan model Random Forest
predicted_label = rf_classifier.predict(new_text_tfidf)

translator = Translator()
translated_text = translator.translate(new_text, dest='en').text

# Mengonversi teks yang telah diterjemahkan menjadi fitur TF-IDF
translated_text_tfidf = tfidf_vectorizer.transform([translated_text])

# Menampilkan prediksi sentimen
predicted_sentiment = rf_classifier.predict(translated_text_tfidf)
sentiment_probability = rf_classifier.predict_proba(translated_text_tfidf)[0, 1]

threshold = 0.5  # Threshold bisa diatur sesuai kebutuhan
sentiment = "Positive" if sentiment_probability > threshold else "Negative"

# Time Series Analysis

# Get historical stock data
new_df = yf.download(stock_symbol, start=start_date, end=end_date)

# Pilih kolom 'Open' (butuhnya opening price)
new_ts = new_df['Open'].values

# Normalisasi data
scaler = StandardScaler()
new_data_normalized = scaler.fit_transform(np.array(new_ts).reshape(-1, 1))

# Ensure the new data is in the sequence format similar to training data
seq_length = 30

# Prepare X_new_data
X_new_data = []

for i in range(len(new_data_normalized) - seq_length):
    X_new_data.append(new_data_normalized[i:i + seq_length])

# Convert X_new_data to numpy array
X_new_data = np.array(X_new_data)

from tensorflow.keras.models import load_model

# Load the pre-trained time series analysis model
model = load_model('/content/time_series_model.h5')

predictions = model.predict(X_new_data)

# forecasting
forecast_days = 5
X_forecast = np.copy(new_data_normalized[-seq_length:])

forecasted_values = []
for _ in range(forecast_days):
    forecasted_value = model.predict(X_forecast.reshape(1, seq_length, 1))
    forecasted_values.append(forecasted_value[0, 0])

    X_forecast = np.roll(X_forecast, -1)
    X_forecast[-1] = forecasted_value

last_actual_day = new_df.index[-1]  # Last day of the actual data
forecast_dates = pd.date_range(last_actual_day, periods=forecast_days + 1)[1:]

last_actual_opening_price = new_df['Open'][-1]  # Opening price of the last day in the actual data
first_forecast_opening_price = forecasted_values[0]  # Opening price of the first day in the forecast

price_difference = first_forecast_opening_price - last_actual_opening_price
percentage_change = price_difference / last_actual_opening_price

print(f"last actual opening price: {last_actual_day} = {last_actual_opening_price}")
print(f"first forecast opening price: {forecast_dates[0]} = {first_forecast_opening_price}")
print(f"Difference in opening stock price between last actual day and first forecast day: {price_difference}")
print(f"Percentage Change: {percentage_change*100}%")

# adjusted percentage_change to be weighted metric
weighted_metric = (percentage_change + 1) / 2
print(f"\nweighted metric: {weighted_metric}")

time_series_weight = weighted_metric

[*********************100%%**********************]  1 of 1 completed
last actual opening price: 2023-11-13 00:00:00 = 3.4254369735717773
first forecast opening price: 2023-11-14 00:00:00 = 1.612433671951294
Difference in opening stock price between last actual day and first forecast day: -1.8130033016204834
Percentage Change: -52.92765027085071%

weighted metric: 0.23536174864574644


In [4]:
# Fungsi untuk mengkombinasikan bobot
def combine_weights(sentiment_probability, time_series_weight, sentiment_ratio=0.65):
    time_series_ratio = 1 - sentiment_ratio

    combined_weight = (sentiment_ratio * sentiment_probability + time_series_ratio * time_series_weight)
    return combined_weight

final_weight = combine_weights(sentiment_probability, time_series_weight)
final_sentiment = "Positive📈" if final_weight > 0.5 else "Negative📉"

print("Bobot:", final_weight)
print("Sentiment:", final_sentiment)

Bobot: 0.5113766120260113
Sentiment: Positive📈
