In [None]:
import pandas as pd
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
import numpy as np
import tensorrt as trt

In [None]:
file_path = r"./data/NEWS_YAHOO_stock_prediction.csv"
df = pd.read_csv(file_path)

texts = df['content'].astype(str)

max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

maxlen = 200
df_padded = pad_sequences(sequences, maxlen=maxlen)

labels = np.random.randint(2, size=(len(texts),))

X_train, X_val, y_train, y_val = train_test_split(df_padded, labels, test_size=0.2, random_state=0)

model = Sequential()
model.add(Embedding(max_words, 128, input_length=maxlen))
model.add(LSTM(50, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, batch_size=32, validation_data=(X_val, y_val), epochs=10)

all_sequences = tokenizer.texts_to_sequences(texts)
all_df_padded = pad_sequences(all_sequences, maxlen=maxlen)
predictions = model.predict(all_df_padded)


In [None]:
positive_counts = []
negative_counts = []

for i in range(len(texts)):
    text = texts[i]
    sentences = text.split('.')
    sequence = tokenizer.texts_to_sequences(sentences)
    padded = pad_sequences(sequence, maxlen=maxlen)
    sentence_predictions = model.predict(padded)

    pos_count = np.sum(sentence_predictions >= 0.7)
    neg_count = np.sum(sentence_predictions < 0.3)

    positive_counts.append(pos_count)
    negative_counts.append(neg_count)

article_sentiments = ['Positive' if pos_count > neg_count else 'Negative' for pos_count, neg_count in
                      zip(positive_counts, negative_counts)]

N_total = len(texts)
N_pos = sum(1 for sentiment in article_sentiments if sentiment == 'Positive')
N_neg = N_total - N_pos

P_pos = N_pos / N_total
P_neg = N_neg / N_total

print(f"P_pos: {P_pos:.2f}")
print(f"P_neg: {P_neg:.2f}")

for text, sentiment in zip(texts[:10], article_sentiments[:10]):
    print(f"{text[:50]}... -> Overall Sentiment: {sentiment}")