In [291]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [292]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [293]:
# Load your dataset
def load_data(file_path):
    file_path = "/content/drive/MyDrive/apple_share_price.csv"


    data = pd.read_csv(file_path)
    texts = data['Date'].astype(str).tolist()
    labels = data['Low'].values
    return texts, labels

In [294]:
# Normalize the labels if they are in a wide range
def preprocess_data(texts, labels, vocab_size=10000, max_length=100):

    tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')

    scaler = MinMaxScaler()
    labels_scaled = scaler.fit_transform(labels.reshape(-1, 1)).flatten()  # Normalize between 0 and 1


    # Modified to return 4 values as expected:
    return padded_sequences, labels_scaled, tokenizer, sequences

In [295]:
# Load dataset
file_path = "/content/drive/MyDrive/apple_share_price.csv"
texts, labels = load_data(file_path)

# Preprocess the data and store tokenizer
padded_sequences, labels_scaled, tokenizer, sequences = preprocess_data(texts, labels)

# Access the vocabulary via tokenizer.word_index
vocab = tokenizer.word_index

In [296]:
vocab

{'<OOV>': 1,
 '11': 2,
 '13': 3,
 '14': 4,
 '12': 5,
 '15': 6,
 '16': 7,
 '17': 8,
 'mar': 9,
 'jun': 10,
 'may': 11,
 'jul': 12,
 'apr': 13,
 'aug': 14,
 'jan': 15,
 'feb': 16,
 'oct': 17,
 'dec': 18,
 'sep': 19,
 'nov': 20,
 '10': 21,
 '9': 22,
 '7': 23,
 '8': 24,
 '6': 25,
 '23': 26,
 '3': 27,
 '28': 28,
 '27': 29,
 '24': 30,
 '21': 31,
 '19': 32,
 '2': 33,
 '20': 34,
 '18': 35,
 '5': 36,
 '22': 37,
 '4': 38,
 '26': 39,
 '1': 40,
 '25': 41,
 '29': 42,
 '30': 43,
 '31': 44}

In [297]:
sequences = pad_sequences(sequence, maxlen=6, padding = 'post', truncating='post')

In [298]:
sequences

array([[ 3, 15,  8,  0,  0,  0],
       [28, 15,  8,  0,  0,  0],
       [21, 15,  8,  0,  0,  0],
       ...,
       [25, 11,  3,  0,  0,  0],
       [33, 11,  3,  0,  0,  0],
       [26, 11,  3,  0,  0,  0]], dtype=int32)

In [299]:
sequences.shape

(1664, 6)

In [300]:
 sequences.shape[0]

1664

In [301]:
label = np.array(label).reshape(-1)

In [302]:
# Split dataset into training and testing
x_train_texts, x_test_texts, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

vocab_size = 10000
max_length = 100

# preprocess_data function returns 4 values: padded_sequences, labels_scaled, tokenizer, sequences
x_train, y_train, tokenizer, sequences = preprocess_data(x_train_texts, y_train, vocab_size, max_length)
x_test, y_test, _, _ = preprocess_data(x_test_texts, y_test, vocab_size, max_length)

In [303]:
X = np.array(range(0,6))
X = X.reshape(1, 6)
y = np.array([1])
y= y.reshape(-1,1)
X, y


(array([[0, 1, 2, 3, 4, 5]]), array([[1]]))

In [304]:
vocab_size = 6
max_length = 10

x_train, y_train, tokenizer, sequences = preprocess_data(x_train_texts, y_train, vocab_size, max_length)
x_test, y_test, _, _ = preprocess_data(x_test_texts, y_test, vocab_size, max_length)

# Access the vocabulary
vocab = tokenizer.word_index
print(f"Vocabulary (top {vocab_size}): {vocab}")


Vocabulary (top 6): {'<OOV>': 1, '12': 2, '11': 3, '14': 4, '13': 5, '15': 6, '16': 7, '17': 8, 'may': 9, 'jun': 10, 'jan': 11, 'jul': 12, 'apr': 13, 'mar': 14, 'aug': 15, 'oct': 16, 'feb': 17, 'nov': 18, 'sep': 19, 'dec': 20, '9': 21, '28': 22, '24': 23, '6': 24, '5': 25, '3': 26, '19': 27, '10': 28, '27': 29, '26': 30, '20': 31, '25': 32, '4': 33, '22': 34, '2': 35, '7': 36, '21': 37, '18': 38, '29': 39, '1': 40, '8': 41, '23': 42, '30': 43, '31': 44}


In [305]:
# Build the RNN model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),
    SimpleRNN(units=32, activation='tanh', return_sequences=False, name="Simple_RNN"),
    Dense(units=1, activation='sigmoid', name="Output_Layer")
])



In [306]:
X = np.array(range(10, 26))
X = X.reshape(-1, 4, 4)
y = np.array(range(1, 5))
y = y.reshape(-1, 1)
X, y


(array([[[10, 11, 12, 13],
         [14, 15, 16, 17],
         [18, 19, 20, 21],
         [22, 23, 24, 25]]]),
 array([[1],
        [2],
        [3],
        [4]]))

In [307]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [308]:
model.fit(x_train, y_train)

[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 5.1283e-04 - loss: 0.6595


<keras.src.callbacks.history.History at 0x7e955bc30730>

In [309]:
# Train the model
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


Epoch 1/5
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 7.9994e-04 - loss: 0.5694 - val_accuracy: 0.0000e+00 - val_loss: 0.5616
Epoch 2/5
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0011 - loss: 0.5729 - val_accuracy: 0.0000e+00 - val_loss: 0.5613
Epoch 3/5
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0032 - loss: 0.5682 - val_accuracy: 0.0000e+00 - val_loss: 0.5613
Epoch 4/5
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0016 - loss: 0.5663 - val_accuracy: 0.0000e+00 - val_loss: 0.5608
Epoch 5/5
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.0023 - loss: 0.5624 - val_accuracy: 0.0000e+00 - val_loss: 0.5620


In [310]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0044 - loss: 0.8561     
Test Loss: 0.864962637424469, Test Accuracy: 0.006006006151437759


In [311]:
# Get weights and biases of each layer
print("\nWeights and Biases of the Model:")

# Get weights and biases of the embedding layer
embedding_weights = model.layers[0].get_weights()  # Embedding Layer
print("\nEmbedding Layer Weights and Biases:")
print(f"Embedding Weights Shape: {embedding_weights[0].shape}")
print(f"Embedding Biases Shape: {embedding_weights[1].shape}" if len(embedding_weights) > 1 else "No Biases")

# Get weights and biases of the SimpleRNN layer
rnn_weights = model.layers[1].get_weights()  # Simple RNN Layer
print("\nSimpleRNN Layer Weights and Biases:")
print(f"RNN Weights Shape: {rnn_weights[0].shape}")
print(f"RNN Biases Shape: {rnn_weights[1].shape}")

# Get weights and biases of the output (Dense) layer
dense_weights = model.layers[2].get_weights()  # Output Dense Layer
print("\nDense Layer Weights and Biases:")
print(f"Dense Weights Shape: {dense_weights[0].shape}")
print(f"Dense Biases Shape: {dense_weights[1].shape}")



Weights and Biases of the Model:

Embedding Layer Weights and Biases:
Embedding Weights Shape: (6, 32)
No Biases

SimpleRNN Layer Weights and Biases:
RNN Weights Shape: (32, 32)
RNN Biases Shape: (32, 32)

Dense Layer Weights and Biases:
Dense Weights Shape: (32, 1)
Dense Biases Shape: (1,)
