# Libs

In [18]:
import os
import joblib
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, explained_variance_score, root_mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, LSTM, Dropout
from IPython.display import clear_output

# Code

## Config

In [None]:
# YFinace
TICKERS = 'DIS'
START_DATE = '2000-01-01'
END_DATE = '2025-02-01'

# Data
DAYS = 60
TEST_SIZE = 0.2
SEED = 42

# Model
DROPOUT_RATE = 0.2

# Train
EPOCHS = 50
BATCH_SIZE = 32

## Data

In [None]:
data = yf.download(tickers=TICKERS, start=START_DATE, end=END_DATE, multi_level_index=False)

In [None]:
data.info()

In [None]:
data.head()

## Data Pre processor

In [None]:
data = data[['Open', 'High', 'Low', 'Volume', 'Close']]

scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

x, y = [], []
for i in range(len(data_scaled) - DAYS - 1):
	x.append(data_scaled[i:(i + DAYS), :-1])
	y.append(data_scaled[i + DAYS, -1])

x, y = np.array(x), np.array(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=TEST_SIZE, random_state=SEED)

## Model

In [None]:
model = Sequential()
model.add(Input(shape=(DAYS, x_train.shape[2])))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(DROPOUT_RATE))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(DROPOUT_RATE))
model.add(Dense(units=25, activation='relu'))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

## Train

In [None]:
history = model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(x_test, y_test))

## Predict

In [None]:
predictions = model.predict(x_test)

In [None]:
predictions_aux = np.zeros((len(predictions), data.shape[1]))
predictions_aux[:, -1] = predictions.flatten()
predictions_real = scaler.inverse_transform(predictions_aux)[:,-1]

y_test_aux = np.zeros((len(predictions), data.shape[1]))
y_test_aux[:, -1] = y_test
y_test_real = scaler.inverse_transform(y_test_aux)[:,-1]

## Metrics

In [None]:
mse = mean_squared_error(y_test_real, predictions_real)
mae = mean_absolute_error(y_test_real, predictions_real)
r2 = r2_score(y_test_real, predictions_real)
mape = mean_absolute_percentage_error(y_test_real, predictions_real)
ev_score = explained_variance_score(y_test_real, predictions_real)
rmse = root_mean_squared_error(y_test_real, predictions_real)

print(f'MAE: {mae}')
print(f'MAPE: {mape}%')
print(f'RMSE: {rmse}')
print(f'MSE: {mse}')
print(f'R2: {r2}')
print(f'EV: {ev_score}')

with open('metrics/score.txt', 'w') as file:
	file.write(f'Mean Absolute Error(MAE): {mae}\n')
	file.write(f'Mean Absolute Percentage Error(MAPE): {mape}%\n')
	file.write(f'Root Mean Squared Error(RMSE): {rmse}\n')
	file.write(f'Mean Squared Error(MSE): {mse}\n')
	file.write(f'R2: {r2}\n')
	file.write(f'EV: {ev_score}')

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(y_test_real, color='blue', label='real value')
plt.plot(predictions_real, color='red', label='predicted value')
plt.title('Predicted vs. real value')
plt.xlabel('Days')
plt.ylabel('Price')
plt.legend()
plt.savefig('./metrics/real_vs_predicted.png')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(y_test_real, predictions_real, alpha=0.5)
plt.plot([min(y_test_real), max(y_test_real)], [min(y_test_real), max(y_test_real)], color='red', linestyle='--')
plt.title(f'Comparison between Actual Values and Predictions')
plt.xlabel('Actual Values')
plt.ylabel('Predict')
plt.grid(True)
plt.savefig('./metrics/scatter.png')
plt.show()

## Save model

In [None]:
model.save('model.keras')
joblib.dump(scaler, 'scaler.gz')

# Update README

In [None]:
txt_file_path = "metrics/score.txt"
readme_path = "../README.md"

with open(txt_file_path, "r", encoding="utf-8") as file:
    content = file.read()

with open(readme_path, "r", encoding="utf-8") as file:
    readme_content = file.read()

start_marker = "<!-- START_SCORE -->"
end_marker = "<!-- END_SCORE -->"
new_section = f"{start_marker}\n```\n{content}\n```\n{end_marker}"

if start_marker in readme_content and end_marker in readme_content:
    updated_readme = readme_content.replace(
        readme_content[readme_content.find(start_marker):readme_content.find(end_marker) + len(end_marker)],
        new_section
    )
else:
    updated_readme = readme_content + "\n" + new_section

with open(readme_path, "w", encoding="utf-8") as file:
    file.write(updated_readme)

print("README updated successfully")

In [19]:
clear_output(wait=True)