# Proceso Modelamiento ML Ops - Yahoo Finance - Nvidia
## Realizado por Daniel Muñoz Palma

In [1]:
#!pip install yfinance -q

In [2]:
#!pip install prophet -q

## Librerias

In [22]:
import yfinance as yf
import pandas as pd
from prophet import Prophet
from datetime import datetime, timedelta
import numpy as np
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error

## 1. Punto: Extraer datos históricos de la compañía Nvidia y sus componentes desde Yahoo Finance utilizando Python

In [23]:
# Fechas: últimos 10 años
end_date = datetime.today()
start_date = end_date - timedelta(days=365*10)

In [37]:
#1. Descargar datos SIN multiíndice
nvda = yf.download("NVDA", start=start_date.strftime("%Y-%m-%d"), end=end_date.strftime("%Y-%m-%d"), progress=True, group_by=None)
#2. Aplanar columnas si aún vienen con multiíndice

if isinstance(nvda.columns, pd.MultiIndex):
    nvda.columns = nvda.columns.get_level_values(1)

#3. Quitar nombres de columnas y resetear índice
nvda.columns.name = None
nvda.reset_index(inplace=True)

  nvda = yf.download("NVDA", start=start_date.strftime("%Y-%m-%d"), end=end_date.strftime("%Y-%m-%d"), progress=True, group_by=None)
[*********************100%***********************]  1 of 1 completed


In [38]:
nvda.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2015-06-22,0.530248,0.533153,0.519116,0.527102,514236000
1,2015-06-23,0.52807,0.532184,0.509435,0.51355,414064000
2,2015-06-24,0.512824,0.515486,0.506047,0.508468,253368000
3,2015-06-25,0.508467,0.515486,0.508467,0.51234,350460000
4,2015-06-26,0.508225,0.508709,0.499755,0.501933,421296000


In [42]:
# 2. Preparar datos para Prophet
df = hist.reset_index()[['Date', 'Close']]

In [43]:
df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)
df['ds'] = df['ds'].dt.tz_localize(None)

In [44]:
df

Unnamed: 0,ds,y
0,2015-06-19,0.529280
1,2015-06-22,0.527102
2,2015-06-23,0.513549
3,2015-06-24,0.508467
4,2015-06-25,0.512340
...,...,...
2510,2025-06-12,145.000000
2511,2025-06-13,141.970001
2512,2025-06-16,144.690002
2513,2025-06-17,144.119995


In [51]:
# 3. Entrenar modelo Prophet
model = Prophet(daily_seasonality=True)
model.fit(df)

12:14:10 - cmdstanpy - INFO - Chain [1] start processing
12:14:12 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x179a896cd10>

In [52]:
# 4. Predecir para mañana
future = model.make_future_dataframe(periods=1)
forecast = model.predict(future)

In [53]:
# 5. Extraer predicción del día siguiente
prediccion_manana = forecast.iloc[-1][['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [54]:
# 6. Calcular errores sobre los datos históricos
forecast_trim = forecast[forecast['ds'].isin(df['ds'])].copy()
df_eval = df.merge(forecast_trim[['ds', 'yhat']], on='ds')
y_true = df_eval['y']
y_pred = df_eval['yhat']

mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


In [55]:
# 7. Crear tabla resumen de errores
errores_df = pd.DataFrame({
    'Métrica': ['MAE', 'MSE', 'RMSE', 'MAPE'],
    'Valor': [mae, mse, rmse, mape]
}).round(2)

In [56]:
#errores_df

Unnamed: 0,Métrica,Valor
0,MAE,4.16
1,MSE,56.36
2,RMSE,7.51
3,MAPE,43.5


In [58]:
# 8. Guardar resultados
hoy = datetime.now().strftime("%Y-%m-%d")
os.makedirs("forecasts", exist_ok=True)

pred_path = f"forecasts/prediccion_{hoy}.csv"
err_path  = f"forecasts/errores_{hoy}.csv"

prediccion_manana.to_frame().T.to_csv(pred_path, index=False)
errores_df.to_csv(err_path, index=False)

In [59]:
# 9. Mostrar en consola
print("✅ Predicción guardada en:", pred_path)
print(prediccion_manana)
print("\n📊 Tabla de errores:")
print(errores_df)

✅ Predicción guardada en: forecasts/prediccion_2025-06-19.csv
ds            2025-06-19 00:00:00
yhat                   155.242048
yhat_lower             145.687696
yhat_upper             164.357993
Name: 2515, dtype: object

📊 Tabla de errores:
  Métrica  Valor
0     MAE   4.16
1     MSE  56.36
2    RMSE   7.51
3    MAPE  43.50


In [67]:
import mlflow
import mlflow.sklearn  # Para modelos de sklearn


import yfinance as yf
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Configuración
TICKER = "NVDA"
YEARS_OF_DATA = 10
FORECAST_DAYS = 30
TEST_DAYS = 30

def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return {
        'MAE': round(mae, 4),
        'MSE': round(mse, 4),
        'RMSE': round(rmse, 4),
        'MAPE (%)': round(mape, 4)
    }

def download_stock_data():
    end_date = datetime.now()
    start_date = end_date - timedelta(days=YEARS_OF_DATA * 365)
    data = yf.download(TICKER, start=start_date, end=end_date, progress=False)
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = data.columns.get_level_values(0)
    print(f"📄 Columnas descargadas: {data.columns}")
    print(f"🔢 Tamaño del dataset: {data.shape}")
    price_column = 'Close' if 'Close' in data.columns else 'Adj Close'
    return data, price_column

def prepare_data(data, price_column):
    df = data[[price_column]].copy()
    df = df.reset_index()
    df.columns = ['ds', 'y']
    df['ds'] = pd.to_datetime(df['ds'])
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df.dropna(inplace=True)
    return df

def build_features(df):
    # Convertimos la fecha a ordinal para que RF la entienda
    df['ds_ordinal'] = df['ds'].map(datetime.toordinal)
    return df[['ds', 'ds_ordinal', 'y']]

def forecast_future_dates(df, model):
    last_date = df['ds'].max()
    future_dates = [last_date + timedelta(days=i) for i in range(1, FORECAST_DAYS + 1)]
    future_ordinals = [d.toordinal() for d in future_dates]
    
    X_future = pd.DataFrame({'ds': future_dates, 'ds_ordinal': future_ordinals})
    X_pred = X_future[['ds_ordinal']]
    y_pred = model.predict(X_pred)
    
    future_forecast = X_future.copy()
    future_forecast['yhat'] = y_pred
    return future_forecast

def save_results(forecast, metrics):
    os.makedirs("forecasts", exist_ok=True)
    today = datetime.now().strftime("%Y%m%d")
    
    forecast.to_csv(f"forecasts/nvda_forecast_rf_{today}.csv", index=False)
    pd.DataFrame([metrics]).to_csv(f"forecasts/nvda_metrics_rf_{today}.csv", index=False)
    with open(f"forecasts/metrics_log_rf_{today}.txt", "w") as f:
        for k, v in metrics.items():
            f.write(f"{k}: {v}\n")

if __name__ == "__main__":
    try:
        print("🔍 Descargando datos...")
        data, price_column = download_stock_data()
        print(f"📊 Usando columna de precios: {price_column}")
        
        df = prepare_data(data, price_column)
        df = build_features(df)

        print("🧪 Separando en entrenamiento y prueba...")
        train = df[:-TEST_DAYS]
        test = df[-TEST_DAYS:]

        X_train = train[['ds_ordinal']]
        y_train = train['y']
        X_test = test[['ds_ordinal']]
        y_test = test['y']

        print("🌲 Entrenando modelo Random Forest...")
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)

        print("🔮 Generando predicciones...")
        y_pred_test = model.predict(X_test)
        metrics = calculate_metrics(y_test, y_pred_test)

        # Forecast de los próximos 30 días
        future_forecast = forecast_future_dates(df, model)

        print("\n📈 Métricas de Error:")
        for k, v in metrics.items():
            print(f"{k}: {v}")

        save_results(future_forecast, metrics)
        print("\n✅ Resultados guardados en /forecasts/")

    except Exception as e:
        print(f"\n❌ Error: {str(e)}")

🔍 Descargando datos...


  data = yf.download(TICKER, start=start_date, end=end_date, progress=False)


📄 Columnas descargadas: Index(['Close', 'High', 'Low', 'Open', 'Volume'], dtype='object', name='Price')
🔢 Tamaño del dataset: (2514, 5)
📊 Usando columna de precios: Close
🧪 Separando en entrenamiento y prueba...
🌲 Entrenando modelo Random Forest...
🔮 Generando predicciones...

📈 Métricas de Error:
MAE: 22.2236
MSE: 559.4666
RMSE: 23.653
MAPE (%): 16.0492

✅ Resultados guardados en /forecasts/


In [68]:
def append_to_list(value, my_list=[]):
    my_list.append(value)
return my_list

In [70]:
print(append_to_list(1))
print(append_to_list(2))
print(append_to_list(3))

[1]
[1, 2]
[1, 2, 3]


In [71]:
import threading

counter = 0

def increment():
    global counter
    for _ in range(1000000):
        counter += 1
        t1 = threading.Thread(target=increment)
        t2 = threading.Thread(target=increment)
        t1.start()
        t2.start()
        t1.join()
        t2.join()
print(counter)

0
