In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
ticker = yf.Ticker("AAPL")
info = ticker.info
 
data = yf.download("GOOGL", start='2020-10-01', end='2023-10-01')
df = pd.DataFrame(data)
df['SMA_10'] = df['Close'].rolling(window=10).mean()
df.dropna(subset=['SMA_10'], inplace=True)
df.reset_index(inplace=True)

In [None]:
# Defino dos decimales para los datos flotantes
pd.options.display.float_format = '{:.2f}'.format
df.head(15)

In [None]:
df.corr()

In [None]:
# Definimos las variables de entrada y salida del modelo (Utilizaremos las fechas para ordenamiento)
data_in = df[['Date', 'SMA_10']]
data_out = df[['Date', 'Close']]

In [None]:
from sklearn.model_selection import train_test_split
# Realizamos el split de X e Y en los sets de entrenamiento (train) y test
X_train, X_test, y_train, y_test = train_test_split(data_in, data_out, test_size=0.20, random_state=1992)



In [None]:
print(f"El DateSet de entrenamiento cuenta con {len(X_train)} observaciones")
print(f"El DateSet de evaluación cuenta con {len(X_test)} observaciones")

In [None]:
# Ordenamos los DataSets
X_train.sort_values(by='Date', inplace=True)
X_test.sort_values(by='Date', inplace=True)
y_train.sort_values(by='Date', inplace=True)
y_test.sort_values(by='Date', inplace=True)

In [None]:
# Eliminamos las columnas Date
date_prediction = y_test['Date']
del(X_train['Date'])
del(X_test['Date'])
del(y_train['Date'])
del(y_test['Date'])

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)

In [None]:
model.coef_

In [None]:
model.intercept_

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse}')
print(f'R2: {r2}')

In [None]:
y_test['Date'] = date_prediction
y_pred_df = pd.DataFrame(y_pred, columns=['Prediction'])

In [None]:
date_prediction_index = pd.DataFrame(y_test['Date'])
date_prediction_index.reset_index(inplace=True)

In [None]:
y_pred_df['Date'] = date_prediction_index['Date']
print(y_pred_df)
print(y_test)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(y_test['Date'], y_test['Close'], label='Valores Reales', color='blue')
plt.plot(y_pred_df['Date'], y_pred_df['Prediction'], label='Predicciones', color='red')
plt.legend()
plt.title('Comparación de Valores Reales y Predicciones')
plt.show()

In [None]:
len(y_pred)