In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint


In [None]:
#Definicion base de datos
def df_yf_tidy(

    tickers,

    start=None,

    end=None,

    interval="1d",

    auto_adjust=False,  # Se mantiene False para conservar "Adj Close" y as칤 mantener las 6 columnas

):

    """

    Devuelve un DataFrame 'largo' con m칰ltiples tickers

    칈ndice por fecha y columna 'Ticker' para identificar.

    """

    cols = ["Open", "High", "Low", "Close", "Adj Close", "Volume"]

    frames = []



    for t in tickers:

        h = yf.Ticker(t).history(

            start=start, end=end, interval=interval, auto_adjust=auto_adjust

        )

        if h.empty:

            continue

        for c in cols:

            if c not in h.columns:

                h[c] = pd.NA

        out = h[cols].copy()

        out["Ticker"] = t

        frames.append(out.reset_index())  # 'Date' pasa a columna



    if not frames:

        return pd.DataFrame(columns=["Date"] + cols + ["Ticker"])



    df = pd.concat(frames, ignore_index=True)

    # Orden de columnas

    df = df[["Date"] + cols + ["Ticker"]]

    return df



#Uso para crear base de datos.

tickers = ["AAPL", "MSFT", "AMZN", "GOOGL", "META"]

df = df_yf_tidy(tickers, start="2000-01-01", end="2025-10-25", interval="1d")




# Variaci칩n relativa

df["variacion_relativa"] = df["Adj Close"] / df["Adj Close"].shift(1)

# Variaci칩n porcentual (en %)

df["variacion_porcentual"] = (df["Adj Close"].pct_change()) * 100

# Variaci칩n logar칤tmica (en valores decimales)

df["variacion_logaritmica"] = np.log(df["Adj Close"]).diff()

df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,variacion_relativa,variacion_porcentual,variacion_logaritmica
28184,2025-10-20 00:00:00-04:00,721.190002,733.77002,720.179993,732.169983,732.169983,8900200,META,1.021272,2.127155,0.021048
28185,2025-10-21 00:00:00-04:00,736.02002,738.5,728.75,733.27002,733.27002,7647300,META,1.001502,0.150243,0.001501
28186,2025-10-22 00:00:00-04:00,733.830017,740.599976,724.030029,733.409973,733.409973,8734500,META,1.000191,0.019086,0.000191
28187,2025-10-23 00:00:00-04:00,734.700012,742.409973,733.099976,734.0,734.0,9856000,META,1.000804,0.08045,0.000804
28188,2025-10-24 00:00:00-04:00,736.789978,741.210022,731.150024,738.359985,738.359985,9151300,META,1.00594,0.594003,0.005922


In [None]:
#Separacion de variables independientes y dependiente
df.dropna(inplace=True)
X = df[['Volume', 'variacion_relativa', 'variacion_porcentual', 'variacion_logaritmica']]
y = df['Adj Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
# Modelo base
rf = RandomForestRegressor(random_state=0)
# Mismo espacio de b칰squeda, pero muestreado aleatoriamente
param_distributions = {    'n_estimators': [100, 200, 300],    'max_depth': [5, 10, 15],    'min_samples_split': [2, 6, 10],    }
# Random Search (solo probar치 n_iter combinaciones al azar)
random_search = RandomizedSearchCV(estimator=rf,    param_distributions=param_distributions,    n_iter=30,    cv=5,    scoring='neg_mean_squared_error',    random_state=0,    verbose= 2)
# Entrenar
random_search.fit(X_train, y_train)
print("Mejores par치metros:", random_search.best_params_)
print("Mejor puntaje (MSE negativo):", random_search.best_score_)



Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV] END .max_depth=5, min_samples_split=2, n_estimators=100; total time=   5.9s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=100; total time=   6.7s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=100; total time=   5.6s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=100; total time=   6.5s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=100; total time=   5.8s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=200; total time=  12.3s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=200; total time=  12.3s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=200; total time=  12.3s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=200; total time=  12.3s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=200; total time=  12.7s
[CV] END .max_depth=5, min_samples_split=2, n_estimators=300; total time=  19.5s
[CV] END .max_depth=5, min_samples_split=2, n_e