# Cargar Librerias

In [36]:
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Cargar Datos

In [37]:
df = yf.download("BHP", start="2020-01-01", end="2023-03-30")

[*********************100%***********************]  1 of 1 completed


# Variables

In [38]:
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close']

# Crear variables de entrada y salida
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelos

## Regresion Lineal

In [39]:
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

## Random Forest

In [40]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

## KNN

In [41]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)

# Comparar

In [42]:
print("R2 score (Linear Regression):", r2_score(y_test, lr_pred))
print("MSE (Linear Regression):", mean_squared_error(y_test, lr_pred))
print("R2 score (Random Forest):", r2_score(y_test, rf_pred))
print("MSE (Random Forest):", mean_squared_error(y_test, rf_pred))
print("R2 score (k-NN):", r2_score(y_test, knn_pred))
print("MSE (k-NN):", mean_squared_error(y_test, knn_pred))

R2 score (Linear Regression): 0.9988639431115465
MSE (Linear Regression): 0.11375060107063384
R2 score (Random Forest): 0.9981917808950749
MSE (Random Forest): 0.1810525618418902
R2 score (k-NN): -0.38211322756399557
MSE (k-NN): 138.38762123707454


# Mejor Modelo

In [43]:
best_model = rf

# Prediccion a 10 dias

In [44]:
last_date = df.index[-1]
date_range = pd.date_range(start=last_date, periods=10, freq='D')
next_prices = []
for date in date_range:
    next_features = [df.iloc[-1]['Open'], df.iloc[-1]['High'], df.iloc[-1]['Low'], df.iloc[-1]['Volume']]
    next_price = best_model.predict([next_features])[0]
    next_prices.append(next_price)
    df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)


  df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)
  df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)
  df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)
  df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)
  df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)
  df = df.append({'Open': next_features[0], 'High': next_features[1], 'Low': next_features[2], 'Close': next_price, 'Volume': next_features[3]}, ignore_index=True)
  df = df.append

In [45]:
print("Predicted prices for the next 10 days:")
print(pd.DataFrame(next_prices, index=date_range))

Predicted prices for the next 10 days:
                   0
2023-03-29  60.79906
2023-03-30  60.79906
2023-03-31  60.79906
2023-04-01  60.79906
2023-04-02  60.79906
2023-04-03  60.79906
2023-04-04  60.79906
2023-04-05  60.79906
2023-04-06  60.79906
2023-04-07  60.79906
