Análisis del DataFrame obtenido mediante YahooFinance. 
Objetivo: Identificar las variables más propensas a influir en el valor de la acción.

In [575]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [576]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error

In [577]:
def graph_prediction(real, prediction):
    real_date = real['Date']
    real_close = real['Close']

    plt.plot(real_date[0:len(prediction)], real_close[0:len(prediction)], color='red', label='Valor real de la acción')
    plt.plot(real_date[0:len(prediction)], prediction, color='blue', label='Predicción de la acción')
    
    # Establecer las etiquetas de las fechas en el eje x
    plt.xticks(rotation=45)
    
    plt.ylim(1.1 * np.min(prediction) / 2, 1.1 * np.max(prediction))
    plt.xlabel('Fecha')
    plt.ylabel("Valor de la Acción")
    plt.legend()
    plt.grid(True)
    plt.show()

In [578]:
ticker = yf.Ticker("AAPL")
info = ticker.info
 
data = yf.download("GOOGL", start='2010-07-13', end='2023-10-01')
df = pd.DataFrame(data)
df['SMA_10'] = df['Close'].rolling(window=10).mean()
df.dropna(subset=['SMA_10'], inplace=True)
df.reset_index(inplace=True)

[*********************100%%**********************]  1 of 1 completed


In [None]:
df.tail()

In [579]:
# Extraemos los datos de la media movil hasta el 2022-09-31 para el entrenamiento, y del 2022-10-01 en adelante para la validación
train_set = df[df['Date'] <= '2022-09-30']
validate_set = df[df['Date'] >= '2022-10-01']
# Eliminamos las columnas innecesarias
train_set.drop(columns=["Open", "High", "Low", "Adj Close", "SMA_10"], inplace=True)
validate_set.drop(columns=["Open", "High", "Low", "Adj Close", "SMA_10"], inplace=True)

print(train_set)
print(validate_set)

           Date       Close     Volume
0    2010-07-26   12.236486   79728192
1    2010-07-27   12.328078   97949952
2    2010-07-28   12.120871   99740160
3    2010-07-29   12.136887  106912980
4    2010-07-30   12.133383   85678236
...         ...         ...        ...
3064 2022-09-26   98.169998   27072700
3065 2022-09-27   97.500000   30072800
3066 2022-09-28  100.050003   32466300
3067 2022-09-29   97.419998   31047200
3068 2022-09-30   95.650002   32941500

[3069 rows x 3 columns]
           Date       Close    Volume
3069 2022-10-03   98.639999  27982000
3070 2022-10-04  101.639999  28850800
3071 2022-10-05  101.430000  22176900
3072 2022-10-06  101.419998  22324000
3073 2022-10-07   98.680000  27502800
...         ...         ...       ...
3314 2023-09-25  131.110001  20094600
3315 2023-09-26  128.570007  25718700
3316 2023-09-27  130.539993  22746500
3317 2023-09-28  132.309998  22513100
3318 2023-09-29  130.860001  30848100

[250 rows x 3 columns]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_set.drop(columns=["Open", "High", "Low", "Adj Close", "SMA_10"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  validate_set.drop(columns=["Open", "High", "Low", "Adj Close", "SMA_10"], inplace=True)


In [580]:
# Establecer la columna "Date" como índice
train_set.set_index('Date', inplace=True)
validate_set.set_index('Date', inplace=True)

In [None]:
# Gráfico para observar los datos de entrenamiento y validación.
train_set["Close"].plot(legend=True)
validate_set["Close"].plot(legend=True)
plt.legend(['Train (2015 - 2022/09)', 'Validate (2022/10)'])
plt.show()

In [None]:
# Normalizamos los datos de entrenamiento entre 0-1
sc = MinMaxScaler(feature_range=(0,1))
train_set_normal = sc.fit_transform(train_set)

In [581]:
# La red LSTM tendrá como entrada "time_step" datos consecutivos, y como salida 1 dato (es el resultado de la predicción a partir de esos "time_step" datos). Se conforma de esta manera el set de entrenamiento.
time_step = 60
x_train = []
y_train = []
m = len(train_set)

for i in range(time_step, m):
    # Datos: 0-time_step; 1-time_step+1; 2-time_step+2; etc.
    x_train.append(train_set[i-time_step:i, 0])

    y_train.append(train_set[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
len(x_train)

InvalidIndexError: (slice(0, 60, None), 0)

In [None]:
# Defino el modelo de entrada y salida de la red
dim_entry = (x_train.shape[1], 1)
dim_out = 1
na = 50

In [None]:
# Defino la red secuencial
model = Sequential()
model.add(LSTM(units=na, input_shape=dim_entry, activation="relu"))
model.add(Dense(units=dim_out))
model.compile(optimizer='rmsprop', loss='mse')
model.fit(x_train, y_train, epochs=50, batch_size=32)

In [None]:
# Preparo los datos de prueba
x_test = validate_set.values
x_test = sc.transform(x_test)
len(x_test)

In [None]:
X_test = []
for i in range(len(x_test) - time_step):
    sequence = x_test[i:i + time_step, 0]
    X_test.append(sequence)

validate_set = validate_set.iloc[:-60]

print(len(validate_set))
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
# Realizo la predicción
prediction = model.predict(X_test)
prediction = sc.inverse_transform(prediction)

In [None]:
graph_prediction(validate_set.reset_index(), prediction)

In [None]:
# Mientras más cercano a 0 mejor
mse = mean_squared_error(validate_set, prediction)
print(mse)

In [None]:
# Mientras más cercano a 1 mejor
from sklearn.metrics import r2_score

r2 = r2_score(validate_set, prediction)
r2

In [None]:
variance_real_data = np.var(validate_set)

# Calcula el porcentaje de efectividad con MSE
effectiveness_mse = 100 * (1 - mse / variance_real_data)

# Calcula el porcentaje de efectividad con R^2
effectiveness_r2 = r2 * 100

print(f"Porcentaje de efectividad (MSE): {effectiveness_mse[0]}%")
print(f"Porcentaje de efectividad (R^2): {effectiveness_r2}%")