In [1]:
import numpy as np
import json
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
# Load the data
with open("sentiment_stock_data1.json", "r") as file:
    data = json.load(file)

# Prepare the dataset
X_sentiments = []
X_previous_close = []
y = []

for company, details in data.items():
    sentiments = details["sentiments"]  # List of decimal sentiment values
    previous_close = details["previous_close"]  # Previous close price for the company
    stock_movement = details["stock_movement"] # The Movement of stock price
    
    X_sentiments.append(sentiments)
    X_previous_close.append(previous_close)
    y.append(stock_movement)

# Pad the sentiment sequences to a fixed length
max_length = max(len(s) for s in X_sentiments)
X_sentiments_padded = pad_sequences(X_sentiments, padding='post', maxlen=max_length)

# Combine padded sentiments and previous close prices into a single feature set
X = np.hstack((X_sentiments_padded, np.array(X_previous_close).reshape(-1, 1)))
y = np.array(y)

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Define the binary classification model
model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")  # Sigmoid output for binary classification (0 or 1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
def binary_accuracy(y_true, y_pred):
    # Convert y_true to float32 to match y_pred_bin type
    y_true = tf.cast(y_true, tf.float32)
    y_pred_bin = tf.cast(y_pred > 0.5, tf.float32)  # Convert probabilities to binary (0 or 1)
    return tf.reduce_mean(tf.cast(tf.equal(y_true, y_pred_bin), tf.float32))


# Recompile the model to include the custom metric
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=BinaryCrossentropy(),  # Use binary cross-entropy loss
    metrics=[binary_accuracy]  # Use the custom binary accuracy metric
)

In [8]:
# Fit the model with the custom binary accuracy metric
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - binary_accuracy: 0.7188 - loss: 2.0789 - val_binary_accuracy: 0.6667 - val_loss: 0.3025
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - binary_accuracy: 0.7431 - loss: 1.4944 - val_binary_accuracy: 0.6667 - val_loss: 0.3532
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - binary_accuracy: 0.6701 - loss: 1.0523 - val_binary_accuracy: 0.6667 - val_loss: 0.4245
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step - binary_accuracy: 0.7431 - loss: 1.0326 - val_binary_accuracy: 0.6667 - val_loss: 0.5684
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step - binary_accuracy: 0.5972 - loss: 1.5546 - val_binary_accuracy: 0.4444 - val_loss: 0.7563
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - binary_accuracy: 0.6458 - loss: 1.4154 - val_bi

In [13]:
# Save the model in the specified path
model.save('sentiment_stock_prediction_model.h5')



In [12]:
#Predicting the values
predictions = model.predict(X_test)
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")
# print(X_test)
print(y_test)
print(predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - binary_accuracy: 1.0000 - loss: 0.5391
Test Accuracy: 1.00
[1 1 1 1 1 1 1]
[[0.5562217 ]
 [0.5336343 ]
 [0.53008115]
 [0.53968966]
 [0.5387726 ]
 [0.7790638 ]
 [0.64430296]]
