In [None]:
import pandas as pd
import matplotlib as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model # type: ignore
from tensorflow.keras.layers import LSTM, Dense # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from tensorflow.keras.callbacks import ModelCheckpoint # type: ignore

In [None]:
df = pd.read_csv('Data_In.csv')
df.rename(columns = {'company_name' : 'Company'}, inplace = True)

# Print Rows with Null Values
# df[df.isnull().any(axis=1)]

In [None]:
search_term = 'AMAZON'
factor = 'Company'

# Filter rows where 'Type' is equal to 'Health'
filtered_df = df[df[factor] == search_term]


In [None]:
# Reshape data into a 1D array
# Scale data to the min and max of the rows i.e 0 -1 range
# Flatten the data once again to a 1D array
# ====================================================================

data = filtered_df['Close'].values.reshape(-1, 1)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
data_scaled2 = data_scaled.flatten()


In [None]:
def create_sequences(data, seq_length):
    sequences, labels = [], []
    
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length, 0]
        label = data[i + seq_length, 0]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

In [None]:
seq_length = 15
n_samples = len(data_scaled)
sequences, labels = create_sequences(data_scaled, seq_length)
split = int(0.8 * len(sequences))
X_test, y_test = sequences[split:], labels[split:]
X_train, y_train = sequences[:split], labels[:split]

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(100, activation='relu', input_shape=(seq_length, 1), return_sequences=True))
model_lstm.add(LSTM(50, activation='relu'))
model_lstm.add(Dense(20))
model_lstm.add(Dense(1))
model_lstm.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
model_lstm.fit(X_train, y_train, epochs=40, batch_size=32, validation_data=(X_test, y_test))

In [99]:
model_lstm.evaluate(X_test, y_test)

[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 3.3357e-04


0.000445700716227293

In [None]:
model_lstm.save('model_lstm_3.keras')

In [None]:
predictor = load_model('model_lstm_3.keras')
amazn = predictor.predict(X_test)

In [None]:
amazn = scaler.inverse_transform(amazn)
amazn_y = scaler.inverse_transform(y_test.reshape(-1, 1))

In [100]:
dates_test = filtered_df['Date'].iloc[split + seq_length:]
# df_plot_lstm = pd.DataFrame({'Fechas': dates_test, 'Real': y_test_actual_LSTM.flatten(), 'Predict': predictions__LSTM.flatten()})
df_plot_lstm = pd.DataFrame({'Date': dates_test[:len(amazn)], 'Real': amazn_y.flatten(), 'Predict': amazn.flatten()})

df_plot_lstm.tail(50)
# df_plot_lstm.to_csv('AMAZON.csv')

Unnamed: 0,Date,Real,Predict
31906,2023-10-19,128.399994,128.170685
31907,2023-10-20,125.169998,126.980766
31908,2023-10-23,126.559998,124.868042
31909,2023-10-24,128.559998,124.137123
31910,2023-10-25,121.389999,124.899139
31911,2023-10-26,119.57,122.400169
31912,2023-10-27,127.739998,119.630264
31913,2023-10-30,132.710007,121.640236
31914,2023-10-31,133.089996,125.954651
31915,2023-11-01,137.0,129.291306
