# Proceso Modelamiento ML Ops - Yahoo Finance - Nvidia
## Realizado por Daniel Mu√±oz Palma

In [1]:
#!pip install yfinance -q

In [2]:
#!pip install prophet -q

## Librerias

In [22]:
import yfinance as yf
import pandas as pd
from prophet import Prophet
from datetime import datetime, timedelta
import numpy as np
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error

## 1. Punto: Extraer datos hist√≥ricos de la compa√±√≠a Nvidia y sus componentes desde Yahoo Finance utilizando Python

In [23]:
# Fechas: √∫ltimos 10 a√±os
end_date = datetime.today()
start_date = end_date - timedelta(days=365*10)

In [37]:
#1. Descargar datos SIN multi√≠ndice
nvda = yf.download("NVDA", start=start_date.strftime("%Y-%m-%d"), end=end_date.strftime("%Y-%m-%d"), progress=True, group_by=None)
#2. Aplanar columnas si a√∫n vienen con multi√≠ndice

if isinstance(nvda.columns, pd.MultiIndex):
    nvda.columns = nvda.columns.get_level_values(1)

#3. Quitar nombres de columnas y resetear √≠ndice
nvda.columns.name = None
nvda.reset_index(inplace=True)

  nvda = yf.download("NVDA", start=start_date.strftime("%Y-%m-%d"), end=end_date.strftime("%Y-%m-%d"), progress=True, group_by=None)
[*********************100%***********************]  1 of 1 completed


In [38]:
nvda.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2015-06-22,0.530248,0.533153,0.519116,0.527102,514236000
1,2015-06-23,0.52807,0.532184,0.509435,0.51355,414064000
2,2015-06-24,0.512824,0.515486,0.506047,0.508468,253368000
3,2015-06-25,0.508467,0.515486,0.508467,0.51234,350460000
4,2015-06-26,0.508225,0.508709,0.499755,0.501933,421296000


In [42]:
# 2. Preparar datos para Prophet
df = hist.reset_index()[['Date', 'Close']]

In [43]:
df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)
df['ds'] = df['ds'].dt.tz_localize(None)

In [44]:
df

Unnamed: 0,ds,y
0,2015-06-19,0.529280
1,2015-06-22,0.527102
2,2015-06-23,0.513549
3,2015-06-24,0.508467
4,2015-06-25,0.512340
...,...,...
2510,2025-06-12,145.000000
2511,2025-06-13,141.970001
2512,2025-06-16,144.690002
2513,2025-06-17,144.119995


In [51]:
# 3. Entrenar modelo Prophet
model = Prophet(daily_seasonality=True)
model.fit(df)

12:14:10 - cmdstanpy - INFO - Chain [1] start processing
12:14:12 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x179a896cd10>

In [52]:
# 4. Predecir para ma√±ana
future = model.make_future_dataframe(periods=1)
forecast = model.predict(future)

In [53]:
# 5. Extraer predicci√≥n del d√≠a siguiente
prediccion_manana = forecast.iloc[-1][['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [54]:
# 6. Calcular errores sobre los datos hist√≥ricos
forecast_trim = forecast[forecast['ds'].isin(df['ds'])].copy()
df_eval = df.merge(forecast_trim[['ds', 'yhat']], on='ds')
y_true = df_eval['y']
y_pred = df_eval['yhat']

mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


In [55]:
# 7. Crear tabla resumen de errores
errores_df = pd.DataFrame({
    'M√©trica': ['MAE', 'MSE', 'RMSE', 'MAPE'],
    'Valor': [mae, mse, rmse, mape]
}).round(2)

In [56]:
#errores_df

Unnamed: 0,M√©trica,Valor
0,MAE,4.16
1,MSE,56.36
2,RMSE,7.51
3,MAPE,43.5


In [58]:
# 8. Guardar resultados
hoy = datetime.now().strftime("%Y-%m-%d")
os.makedirs("forecasts", exist_ok=True)

pred_path = f"forecasts/prediccion_{hoy}.csv"
err_path  = f"forecasts/errores_{hoy}.csv"

prediccion_manana.to_frame().T.to_csv(pred_path, index=False)
errores_df.to_csv(err_path, index=False)

In [59]:
# 9. Mostrar en consola
print("‚úÖ Predicci√≥n guardada en:", pred_path)
print(prediccion_manana)
print("\nüìä Tabla de errores:")
print(errores_df)

‚úÖ Predicci√≥n guardada en: forecasts/prediccion_2025-06-19.csv
ds            2025-06-19 00:00:00
yhat                   155.242048
yhat_lower             145.687696
yhat_upper             164.357993
Name: 2515, dtype: object

üìä Tabla de errores:
  M√©trica  Valor
0     MAE   4.16
1     MSE  56.36
2    RMSE   7.51
3    MAPE  43.50


In [66]:
import yfinance as yf
from prophet import Prophet
import pandas as pd
from datetime import datetime, timedelta
import os
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Configuraci√≥n
TICKER = "NVDA"
YEARS_OF_DATA = 10
FORECAST_DAYS = 30
TEST_DAYS = 30  # D√≠as para evaluaci√≥n

def calculate_metrics(y_true, y_pred):
    """Calcula m√©tricas de error"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    return {
        'MAE': round(mae, 4),
        'MSE': round(mse, 4),
        'RMSE': round(rmse, 4),
        'MAPE (%)': round(mape, 4)
    }

def download_stock_data():
    """Descarga datos con manejo de nombres de columnas"""
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365 * YEARS_OF_DATA)
    
    data = yf.download(
        TICKER,
        start=start_date.strftime("%Y-%m-%d"),
        end=end_date.strftime("%Y-%m-%d"),
        progress=False
    )
    
    # Verificar nombres de columnas (yFinance puede cambiarlos)
    price_column = 'Close' if 'Close' in data.columns else 'Adj Close'
    return data, price_column

def prepare_data(data, price_column):
    """Prepara datos para Prophet"""
    df = data.reset_index()[['Date', price_column]].copy()
    df.columns = ['ds', 'y']  # Prophet requiere estas columnas
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    return df.dropna()

def train_test_split(df):
    """Separa datos en train y test"""
    train = df[:-TEST_DAYS]
    test = df[-TEST_DAYS:]
    return train, test

def save_results(forecast, metrics):
    """Guarda resultados en CSV"""
    os.makedirs("forecasts", exist_ok=True)
    today = datetime.now().strftime("%Y%m%d")
    
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].to_csv(
        f"forecasts/nvda_forecast_{today}.csv", index=False)
    
    pd.DataFrame([metrics]).to_csv(
        f"forecasts/nvda_metrics_{today}.csv", index=False)

if __name__ == "__main__":
    try:
        print("üîç Descargando datos...")
        data, price_column = download_stock_data()
        print(f"üìä Usando columna de precios: {price_column}")
        
        df = prepare_data(data, price_column)
        train, test = train_test_split(df)
        
        print("üß† Entrenando modelo...")
        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=False,
            daily_seasonality=False
        )
        model.fit(train)
        
        print("üîÆ Generando predicciones...")
        future = model.make_future_dataframe(periods=TEST_DAYS)
        forecast = model.predict(future)
        
        # Evaluaci√≥n
        test_forecast = forecast.iloc[-TEST_DAYS:].set_index('ds')['yhat']
        test_true = test.set_index('ds')['y']
        
        metrics = calculate_metrics(test_true, test_forecast)
        
        print("\n‚≠ê M√©tricas de Error:")
        for k, v in metrics.items():
            print(f"{k}: {v}")
        
        save_results(forecast, metrics)
        print(f"\n‚úÖ Resultados guardados en /forecasts/")

    except Exception as e:
        print(f"\n‚ùå Error: {str(e)}")
        raise

üîç Descargando datos...
üìä Usando columna de precios: Close
üß† Entrenando modelo...


  data = yf.download(
12:26:45 - cmdstanpy - INFO - Chain [1] start processing
12:26:45 - cmdstanpy - INFO - Chain [1] done processing


üîÆ Generando predicciones...

‚≠ê M√©tricas de Error:
MAE: 16.0128
MSE: 286.1353
RMSE: 16.9155
MAPE (%): 12.0922

‚úÖ Resultados guardados en /forecasts/
