In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
import mlflow
import mlflow.pytorch
import logging
from pathlib import Path
from dash import Dash, html, dcc, dash_table
import plotly.express as px
from joblib import Parallel, delayed

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def calculate_metrics(y_true, y_pred):
    """Calculate RMSE, MAE, and R²."""
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {'rmse': rmse, 'mae': mae, 'r2': r2}

class StockPriceNN(nn.Module):
    """Feed-forward neural network for stock price forecasting."""
    def __init__(self, input_size, hidden_size=64):
        super(StockPriceNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class StockPriceLSTM(nn.Module):
    """LSTM network for stock price forecasting."""
    def __init__(self, input_size, hidden_size=64, num_layers=2):
        super(StockPriceLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

def train_pytorch_forecast(data_path, target, selected_features, horizon=1, params=None, use_lstm=False):
    """Train PyTorch model for h-month-ahead forecasting."""
    print("Test: Entering train_pytorch_forecast")
    if params is None:
        params = {'hidden_size': 64, 'epochs': 100, 'learning_rate': 0.001, 'batch_size': 16}

    try:
        df = pd.read_parquet(data_path)
        logger.info(f"Loaded data for {target} with shape {df.shape}")
        if target not in df.columns:
            logger.error(f"Target column {target} not found in data.")
            return None, None, None

        df[target] = pd.to_numeric(df[target], errors='coerce')
        if df[target].isna().any():
            logger.warning(f"Target {target} contains {df[target].isna().sum()} NaN values after conversion.")
            df = df.dropna(subset=[target])
            if df.empty:
                logger.error(f"No valid data for {target} after numeric conversion.")
                return None, None, None
        logger.info(f"Target {target} dtype: {df[target].dtype}")

        if len(df) < horizon:
            logger.error(f"Dataset too short ({len(df)} rows) for horizon {horizon}.")
            return None, None, None

        df[target + f'_t{horizon}'] = df[target].shift(-horizon)
        df = df.dropna(subset=selected_features + [target + f'_t{horizon}'])
        logger.info(f"Data shape after NaN handling: {df.shape}")
        if df.empty:
            logger.error(f"No valid data for {target} after NaN handling.")
            return None, None, None

        X = df[selected_features].fillna(method="ffill").fillna(method="bfill")
        y = df[target + f'_t{horizon}']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
        logger.info(f"Train indices: {X_train.index[:5].tolist()}, Test indices: {X_test.index[:5].tolist()}")
        y_train = pd.to_numeric(y_train, errors='coerce')
        y_test = pd.to_numeric(y_test, errors='coerce')
        non_nan_train = ~y_train.isna()
        non_nan_test = ~y_test.isna()
        X_train = X_train[non_nan_train]
        y_train = y_train[non_nan_train]
        X_test = X_test[non_nan_test]
        y_test = y_test[non_nan_test]
        logger.info(f"Train size after NaN filter: {len(X_train)}, Test size after NaN filter: {len(X_test)}")
        if len(X_train) == 0 or len(X_test) == 0:
            logger.error(f"No valid train/test data for {target} after NaN handling.")
            return None, None, None

        print("Test2: Before scaling")
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        if use_lstm:
            # Reshape for LSTM [samples, timesteps, features]
            X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
            X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

        X_train_tensor = torch.FloatTensor(X_train_scaled)
        y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1)
        X_test_tensor = torch.FloatTensor(X_test_scaled)
        y_test_tensor = torch.FloatTensor(y_test.values).reshape(-1, 1)

        input_size = X.shape[1]
        model = StockPriceLSTM(input_size, params['hidden_size']) if use_lstm else StockPriceNN(input_size, params['hidden_size'])
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

        mlflow.set_experiment("stock_price_forecasting")
        with mlflow.start_run(run_name=f"{target}_horizon{horizon}"):
            mlflow.set_tag("ticker", target.split('_')[0])
            mlflow.set_tag("horizon", horizon)
            mlflow.set_tag("model_type", "LSTM" if use_lstm else "NN")
            model.train()
            for epoch in range(params['epochs']):
                for i in range(0, len(X_train_tensor), params['batch_size']):
                    batch_X = X_train_tensor[i:i + params['batch_size']]
                    batch_y = y_train_tensor[i:i + params['batch_size']]
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()
                if (epoch + 1) % 10 == 0:
                    logger.info(f"Epoch {epoch+1}/{params['epochs']} - Loss: {loss.item():.4f}")

            model.eval()
            with torch.no_grad():
                y_pred = model(X_test_tensor).numpy().flatten()
            # Bias correction
            with torch.no_grad():
                y_train_pred = model(X_train_tensor).numpy().flatten()
            bias = np.mean(y_train_pred - y_train)
            y_pred += bias
            metrics = calculate_metrics(y_test, y_pred)
            logger.info(f"Metrics for {target} (horizon {horizon}): {metrics}")

            current_prices = df.loc[y_test.index, target]
            logger.info(f"Current prices shape: {current_prices.shape}, y_test shape: {y_test.shape}")
            price_change = (y_pred - current_prices) / current_prices * 100
            signals = np.where(price_change > 5, "Buy", np.where(price_change < -5, "Sell", "Hold"))
            backtest_df = pd.DataFrame({
                "Date": y_test.index,
                "Actual": y_test,
                "Predicted": y_pred,
                "Current_Price": current_prices,
                "Signal": signals
            })
            backtest_df.to_csv(f"backtest_{target}_horizon{horizon}.csv")
            mlflow.log_params(params)
            mlflow.log_metrics(metrics)
            mlflow.log_param("features", ", ".join(selected_features))
            mlflow.log_artifact(f"backtest_{target}_horizon{horizon}.csv")
            mlflow.pytorch.log_model(model, "pytorch_model", input_example=X_train_scaled[:1])
            model_uri = f"runs:/{mlflow.active_run().info.run_id}/pytorch_model"
            registered_model = mlflow.register_model(model_uri, f"StockPricePyTorch_Horizon{horizon}")
            logger.info(f"Model registered: {registered_model.name} version {registered_model.version}")

        return model, metrics, backtest_df
    except Exception as e:
        print(f"Error in train_pytorch_forecast for {target} horizon {horizon}: {str(e)}")
        logger.error(f"Error in train_pytorch_forecast for {target} horizon {horizon}: {str(e)}")
        return None, None, None

def getStockFeatures(ticker="AAPL"):
    print("Test: Entering getStockFeatures")
    """Get top 10 features for a stock using Random Forest."""
    data_path = Path(f"C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/data/combined/{ticker}_combined.parquet")
    if not data_path.exists():
        logger.error(f"Data file for {ticker} not found at {data_path}.")
        return []
    df = pd.read_parquet(data_path)
    target = f"{ticker}_Close"
    if target not in df.columns:
        logger.error(f"Target column {target} not found in data for {ticker}.")
        return []
    df[target] = pd.to_numeric(df[target], errors='coerce')
    if df[target].isna().any():
        logger.warning(f"Target {target} contains {df[target].isna().sum()} NaN values after conversion.")
        df = df.dropna(subset=[target])
    features = [col for col in df.columns if col != target and not col.startswith(target + '_t')]
    logger.info(f"Found {len(features)} features for {ticker}: {features}")
    if not features:
        logger.error(f"No valid features found for {ticker}.")
        return []

    df = df.dropna(subset=features + [target])
    logger.info(f"Data shape after NaN handling for {ticker}: {df.shape}")
    if df.empty:
        logger.error(f"No valid data for {ticker} after NaN handling.")
        return []

    X = df[features]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1)
    cv_scores = cross_val_score(rf, X_train_scaled, y_train, cv=5, scoring="r2")
    logger.info(f"Cross-validated R-squared for {ticker}: {cv_scores.mean():.3f} (±{cv_scores.std():.3f})")
    rf.fit(X_train_scaled, y_train)
    importances = rf.feature_importances_
    importance_df = pd.DataFrame({
        "Feature": features,
        "Importance": importances
    }).sort_values(by="Importance", ascending=False)
    return importance_df.head(10)['Feature'].tolist()

def process_stock(ticker, horizons=[1, 3]):
    print("Test: Entering process_stock")
    """Process a single stock."""
    try:
        logger.info(f"Starting processing for {ticker}")
        top_features = getStockFeatures(ticker)
        if not top_features:
            logger.error(f"No features returned for {ticker}.")
            return None
        results = []
        model_path = f"C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/data/combined/{ticker}_combined.parquet"
        target_col = f"{ticker}_Close"
        for horizon in horizons:
            logger.info(f"Training model for {ticker} horizon {horizon}")
            use_lstm = (horizon == 3)  # Use LSTM for 3-month horizon
            model, metrics, backtest_df = train_pytorch_forecast(model_path, target_col, top_features, horizon, use_lstm=use_lstm)
            if metrics is None:
                logger.error(f"Failed to train model for {ticker} horizon {horizon}")
                continue
            results.append({
                'ticker': ticker,
                'horizon': horizon,
                'rmse': metrics['rmse'],
                'mae': metrics['mae'],
                'r2': metrics['r2'],
                'top_features': top_features,
                'backtest_df': backtest_df
            })
        logger.info(f"Completed processing for {ticker} with {len(results)} results")
        return results
    except Exception as e:
        logger.error(f"Error processing {ticker}: {str(e)}")
        return None

def create_dashboard(results):
    """Create a Dash dashboard for buy/sell signals."""
    app = Dash(__name__)
    children = [
        html.H1("Stock Price Prediction Dashboard"),
        html.H2("Top Stocks by R²")
    ]
    
    for result in results:
        ticker = result['ticker']
        horizon = result['horizon']
        backtest_df = result['backtest_df']
        fig = px.line(backtest_df, x="Date", y=["Actual", "Predicted"], title=f"{ticker} {horizon}-Month Forecast")
        fig.add_scatter(x=backtest_df[backtest_df["Signal"] == "Buy"]["Date"],
                        y=backtest_df[backtest_df["Signal"] == "Buy"]["Predicted"],
                        mode="markers", name="Buy", marker=dict(symbol="triangle-up", size=10, color="green"))
        fig.add_scatter(x=backtest_df[backtest_df["Signal"] == "Sell"]["Date"],
                        y=backtest_df[backtest_df["Signal"] == "Sell"]["Predicted"],
                        mode="markers", name="Sell", marker=dict(symbol="triangle-down", size=10, color="red"))
        children.append(html.H3(f"{ticker} (Horizon: {horizon} months, R²: {result['r2']:.3f})"))
        children.append(dcc.Graph(figure=fig))
        children.append(dash_table.DataTable(
            data=backtest_df.to_dict('records'),
            columns=[{"name": i, "id": i} for i in backtest_df.columns],
            style_table={'overflowX': 'auto'},
            page_size=10
        ))
        children.append(html.Hr())

    app.layout = html.Div(children)
    app.run(debug=True)

if __name__ == "__main__":
    print("Test: Main block")
    tickers = pd.read_csv("C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/equities.csv")['Symbol'].tolist()
    logger.info(f"Processing {len(tickers)} tickers: {tickers}")
    # Process a single ticker for testing
    results = process_stock("AAPL")
    print(f"Results: {results}")
    
    if results:
        # Create dashboard for AAPL
        create_dashboard(results)
        
        # Process all tickers
        results = Parallel(n_jobs=-1)(delayed(process_stock)(ticker) for ticker in tickers)  # Limit for demo
        results = [r for r in sum([r if r else [] for r in results], []) if r is not None]
        results_df = pd.DataFrame(results)
        if not results_df.empty:
            logger.info(f"Results DataFrame shape: {results_df.shape}")
            top_10 = results_df.sort_values(by='r2', ascending=False).head(10)
            logger.info(f"Top 10 models by R²:\n{top_10[['ticker', 'horizon', 'r2', 'rmse', 'mae']]}")
            top_10.to_csv("top_10_forecasts_by_r2.csv", index=False)

            with mlflow.start_run(run_name="Top_10_Forecasts_Summary"):
                mlflow.log_artifact("top_10_forecasts_by_r2.csv")
                for i, row in top_10.iterrows():
                    mlflow.log_metric(f"{row['ticker']}_horizon{row['horizon']}_r2", row['r2'])
                    mlflow.log_metric(f"{row['ticker']}_horizon{row['horizon']}_rmse", row['rmse'])
                    mlflow.log_metric(f"{row['ticker']}_horizon{row['horizon']}_mae", row['mae'])

            create_dashboard(results)  # Dashboard for all tickers

Gte
Test Process_Stock
Test
Test
Test2


ERROR:__main__:Error processing AAPL: cannot unpack non-iterable NoneType object


Error in train_pytorch_forecast for AAPL_Close horizon 1: positional indexers are out-of-bounds


In [11]:
results

In [13]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
import mlflow
import mlflow.pytorch
import logging
from pathlib import Path
from joblib import Parallel, delayed

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def calculate_metrics(y_true, y_pred):
    """Calculate RMSE, MAE, and R²."""
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {'rmse': rmse, 'mae': mae, 'r2': r2}

class StockPriceNN(nn.Module):
    """Feed-forward neural network for stock price forecasting."""
    def __init__(self, input_size, hidden_size=64):
        super(StockPriceNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_pytorch_forecast(data_path, target, selected_features, horizon=1, params=None):
    """Train PyTorch model for h-month-ahead forecasting."""
    print("Test: Entering train_pytorch_forecast")
    if params is None:
        params = {'hidden_size': 64, 'epochs': 100, 'learning_rate': 0.001, 'batch_size': 16}

    try:
        df = pd.read_parquet(data_path)
        logger.info(f"Loaded data for {target} with shape {df.shape}")
        if target not in df.columns:
            logger.error(f"Target column {target} not found in data.")
            return None, None, None

        # Convert target to numeric, handling strings
        df[target] = pd.to_numeric(df[target], errors='coerce')
        if df[target].isna().any():
            logger.warning(f"Target {target} contains {df[target].isna().sum()} NaN values after conversion.")
            df = df.dropna(subset=[target])
            if df.empty:
                logger.error(f"No valid data for {target} after numeric conversion.")
                return None, None, None
        logger.info(f"Target {target} dtype after conversion: {df[target].dtype}")

        if len(df) < horizon:
            logger.error(f"Dataset too short ({len(df)} rows) for horizon {horizon}.")
            return None, None, None

        df[target + f'_t{horizon}'] = df[target].shift(-horizon)
        df = df.dropna(subset=selected_features + [target + f'_t{horizon}'])
        logger.info(f"Data shape after NaN handling: {df.shape}")
        if df.empty:
            logger.error(f"No valid data for {target} after NaN handling.")
            return None, None, None

        X = df[selected_features].fillna(method="ffill").fillna(method="bfill")
        y = df[target + f'_t{horizon}']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
        logger.info(f"Train indices: {X_train.index[:5].tolist()}, Test indices: {X_test.index[:5].tolist()}")
        y_train = pd.to_numeric(y_train, errors='coerce')
        y_test = pd.to_numeric(y_test, errors='coerce')
        non_nan_train = ~y_train.isna()
        non_nan_test = ~y_test.isna()
        X_train = X_train[non_nan_train]
        y_train = y_train[non_nan_train]
        X_test = X_test[non_nan_test]
        y_test = y_test[non_nan_test]
        logger.info(f"Train size after NaN filter: {len(X_train)}, Test size after NaN filter: {len(X_test)}")
        if len(X_train) == 0 or len(X_test) == 0:
            logger.error(f"No valid train/test data for {target} after NaN handling.")
            return None, None, None

        print("Test2: Before scaling")
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        X_train_tensor = torch.FloatTensor(X_train_scaled)
        y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1)
        X_test_tensor = torch.FloatTensor(X_test_scaled)
        y_test_tensor = torch.FloatTensor(y_test.values).reshape(-1, 1)

        input_size = X.shape[1]
        model = StockPriceNN(input_size, params['hidden_size'])
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

        mlflow.set_experiment("stock_price_forecasting")
        with mlflow.start_run(run_name=f"{target}_horizon{horizon}"):
            mlflow.set_tag("ticker", target.split('_')[0])
            mlflow.set_tag("horizon", horizon)
            model.train()
            for epoch in range(params['epochs']):
                for i in range(0, len(X_train_tensor), params['batch_size']):
                    batch_X = X_train_tensor[i:i + params['batch_size']]
                    batch_y = y_train_tensor[i:i + params['batch_size']]
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()
                if (epoch + 1) % 10 == 0:
                    logger.info(f"Epoch {epoch+1}/{params['epochs']} - Loss: {loss.item():.4f}")

            model.eval()
            with torch.no_grad():
                y_pred = model(X_test_tensor).numpy().flatten()
            metrics = calculate_metrics(y_test, y_pred)
            logger.info(f"Metrics for {target} (horizon {horizon}): {metrics}")

            current_prices = df.loc[y_test.index, target]
            logger.info(f"Current prices shape: {current_prices.shape}, y_test shape: {y_test.shape}")
            price_change = (y_pred - current_prices) / current_prices * 100
            signals = np.where(price_change > 5, "Buy", np.where(price_change < -5, "Sell", "Hold"))
            backtest_df = pd.DataFrame({
                "Date": y_test.index,
                "Actual": y_test,
                "Predicted": y_pred,
                "Current_Price": current_prices,
                "Signal": signals
            })
            backtest_df.to_csv(f"backtest_{target}_horizon{horizon}.csv")
            mlflow.log_params(params)
            mlflow.log_metrics(metrics)
            mlflow.log_param("features", ", ".join(selected_features))
            mlflow.log_artifact(f"backtest_{target}_horizon{horizon}.csv")
            mlflow.pytorch.log_model(model, "pytorch_model")
            model_uri = f"runs:/{mlflow.active_run().info.run_id}/pytorch_model"
            registered_model = mlflow.register_model(model_uri, f"StockPricePyTorch_Horizon{horizon}")
            logger.info(f"Model registered: {registered_model.name} version {registered_model.version}")

        return model, metrics, backtest_df
    except Exception as e:
        print(f"Error in train_pytorch_forecast for {target} horizon {horizon}: {str(e)}")
        logger.error(f"Error in train_pytorch_forecast for {target} horizon {horizon}: {str(e)}")
        return None, None, None

def getStockFeatures(ticker="AAPL"):
    print("Test: Entering getStockFeatures")
    """Get top 10 features for a stock using Random Forest."""
    data_path = Path(f"C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/data/combined/{ticker}_combined.parquet")
    if not data_path.exists():
        logger.error(f"Data file for {ticker} not found at {data_path}.")
        return []
    df = pd.read_parquet(data_path)
    target = f"{ticker}_Close"
    if target not in df.columns:
        logger.error(f"Target column {target} not found in data for {ticker}.")
        return []
    # Convert target to numeric in feature selection
    df[target] = pd.to_numeric(df[target], errors='coerce')
    if df[target].isna().any():
        logger.warning(f"Target {target} contains {df[target].isna().sum()} NaN values after conversion.")
        df = df.dropna(subset=[target])
    features = [col for col in df.columns if col != target and not col.startswith(target + '_t')]
    logger.info(f"Found {len(features)} features for {ticker}: {features}")
    if not features:
        logger.error(f"No valid features found for {ticker}.")
        return []

    df = df.dropna(subset=features + [target])
    logger.info(f"Data shape after NaN handling for {ticker}: {df.shape}")
    if df.empty:
        logger.error(f"No valid data for {ticker} after NaN handling.")
        return []

    X = df[features]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1)
    cv_scores = cross_val_score(rf, X_train_scaled, y_train, cv=5, scoring="r2")
    logger.info(f"Cross-validated R-squared for {ticker}: {cv_scores.mean():.3f} (±{cv_scores.std():.3f})")
    rf.fit(X_train_scaled, y_train)
    importances = rf.feature_importances_
    importance_df = pd.DataFrame({
        "Feature": features,
        "Importance": importances
    }).sort_values(by="Importance", ascending=False)
    return importance_df.head(10)['Feature'].tolist()

def process_stock(ticker, horizons=[1, 3]):
    print("Test: Entering process_stock")
    """Process a single stock."""
    try:
        logger.info(f"Starting processing for {ticker}")
        top_features = getStockFeatures(ticker)
        if not top_features:
            logger.error(f"No features returned for {ticker}.")
            return None
        results = []
        model_path = f"C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/data/combined/{ticker}_combined.parquet"
        target_col = f"{ticker}_Close"
        for horizon in horizons:
            logger.info(f"Training model for {ticker} horizon {horizon}")
            model, metrics, backtest_df = train_pytorch_forecast(model_path, target_col, top_features, horizon)
            if metrics is None:
                logger.error(f"Failed to train model for {ticker} horizon {horizon}")
                continue
            results.append({
                'ticker': ticker,
                'horizon': horizon,
                'rmse': metrics['rmse'],
                'mae': metrics['mae'],
                'r2': metrics['r2'],
                'top_features': top_features,
                'backtest_df': backtest_df
            })
        logger.info(f"Completed processing for {ticker} with {len(results)} results")
        return results
    except Exception as e:
        logger.error(f"Error processing {ticker}: {str(e)}")
        return None

if __name__ == "__main__":
    print("Test: Main block")
    tickers = pd.read_csv("C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/equities.csv")['Symbol'].tolist()
    logger.info(f"Processing {len(tickers)} tickers: {tickers}")
    results = process_stock("AAPL")
    print(f"Results: {results}")

Test: Main block
Test: Entering process_stock
Test: Entering getStockFeatures
Test: Entering train_pytorch_forecast
Test2: Before scaling


Successfully registered model 'StockPricePyTorch_Horizon1'.
Created version '1' of model 'StockPricePyTorch_Horizon1'.


Test: Entering train_pytorch_forecast
Test2: Before scaling




Results: [{'ticker': 'AAPL', 'horizon': 1, 'rmse': 27.225903644664307, 'mae': 23.55388609599129, 'r2': 0.5352345684363038, 'top_features': ['AAPL_Close_ma3', 'SRVPRD_lag1', 'AAPL_Close_lag1', 'PAYEMS_lag1', 'BUSINV_lag3', 'SRVPRD_lag3', 'CPIAUCSL_lag6', 'CSUSHPINSA', 'PI_lag1', 'CIVPART'], 'backtest_df':                  Date    Actual   Predicted  Current_Price Signal
2020-04-01 2020-04-01   77.0563   65.145096        71.2057   Sell
2020-05-01 2020-05-01   88.6527   71.925064        77.0563   Sell
2020-06-01 2020-06-01  103.2921   74.301880        88.6527   Sell
2020-07-01 2020-07-01  125.4358   83.765152       103.2921   Sell
2020-08-01 2020-08-01  112.7783   93.105011       125.4358   Sell
...               ...       ...         ...            ...    ...
2024-12-01 2024-12-01  235.4321  203.603836       249.8174   Sell
2025-01-01 2025-01-01  241.2580  208.146225       235.4321   Sell
2025-02-01 2025-02-01  221.8391  206.212189       241.2580   Sell
2025-03-01 2025-03-01  212.2217  2

Successfully registered model 'StockPricePyTorch_Horizon3'.
Created version '1' of model 'StockPricePyTorch_Horizon3'.


In [17]:
results[1]['backtest_df']['CorrectMove'] = np.where(
    results[1]['backtest_df']['Actual'] > results[1]['backtest_df']['Current_Price'],
    'Buy',
    'Sell'
)

In [18]:
results[1]['backtest_df']

Unnamed: 0,Date,Actual,Predicted,Current_Price,Signal,CorrectMove
2020-02-01,2020-02-01,77.0563,71.386497,66.0950,Buy,Buy
2020-03-01,2020-03-01,88.6527,68.621162,61.6300,Buy,Buy
2020-04-01,2020-04-01,103.2921,64.996696,71.2057,Sell,Buy
2020-05-01,2020-05-01,125.4358,61.402420,77.0563,Sell,Buy
2020-06-01,2020-06-01,112.7783,64.082947,88.6527,Sell,Buy
...,...,...,...,...,...,...
2024-10-01,2024-10-01,235.4321,189.971603,225.1187,Sell,Buy
2024-11-01,2024-11-01,241.2580,189.289734,236.4987,Sell,Buy
2024-12-01,2024-12-01,221.8391,194.085815,249.8174,Sell,Sell
2025-01-01,2025-01-01,212.2217,198.701111,235.4321,Sell,Sell


In [22]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
import mlflow
import mlflow.pytorch
import logging
from pathlib import Path
from dash import Dash, html, dcc, dash_table
import plotly.express as px
from joblib import Parallel, delayed

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def calculate_metrics(y_true, y_pred):
    """Calculate RMSE, MAE, and R²."""
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return {'rmse': rmse, 'mae': mae, 'r2': r2}

class StockPriceNN(nn.Module):
    """Feed-forward neural network for stock price forecasting."""
    def __init__(self, input_size, hidden_size=64):
        super(StockPriceNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class StockPriceLSTM(nn.Module):
    """LSTM network for stock price forecasting."""
    def __init__(self, input_size, hidden_size=64, num_layers=2):
        super(StockPriceLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

def train_pytorch_forecast(data_path, target, selected_features, horizon=1, params=None, use_lstm=False):
    """Train PyTorch model for h-month-ahead forecasting."""
    print("Test: Entering train_pytorch_forecast")
    if params is None:
        params = {'hidden_size': 64, 'epochs': 100, 'learning_rate': 0.001, 'batch_size': 16}

    try:
        df = pd.read_parquet(data_path)
        logger.info(f"Loaded data for {target} with shape {df.shape}")
        if target not in df.columns:
            logger.error(f"Target column {target} not found in data.")
            return None, None, None

        df[target] = pd.to_numeric(df[target], errors='coerce')
        if df[target].isna().any():
            logger.warning(f"Target {target} contains {df[target].isna().sum()} NaN values after conversion.")
            df = df.dropna(subset=[target])
            if df.empty:
                logger.error(f"No valid data for {target} after numeric conversion.")
                return None, None, None
        logger.info(f"Target {target} dtype: {df[target].dtype}")

        if len(df) < horizon:
            logger.error(f"Dataset too short ({len(df)} rows) for horizon {horizon}.")
            return None, None, None

        df[target + f'_t{horizon}'] = df[target].shift(-horizon)
        df = df.dropna(subset=selected_features + [target + f'_t{horizon}'])
        logger.info(f"Data shape after NaN handling: {df.shape}")
        if df.empty:
            logger.error(f"No valid data for {target} after NaN handling.")
            return None, None, None

        X = df[selected_features].fillna(method="ffill").fillna(method="bfill")
        y = df[target + f'_t{horizon}']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
        logger.info(f"Train indices: {X_train.index[:5].tolist()}, Test indices: {X_test.index[:5].tolist()}")
        y_train = pd.to_numeric(y_train, errors='coerce')
        y_test = pd.to_numeric(y_test, errors='coerce')
        non_nan_train = ~y_train.isna()
        non_nan_test = ~y_test.isna()
        X_train = X_train[non_nan_train]
        y_train = y_train[non_nan_train]
        X_test = X_test[non_nan_test]
        y_test = y_test[non_nan_test]
        logger.info(f"Train size after NaN filter: {len(X_train)}, Test size after NaN filter: {len(X_test)}")
        if len(X_train) == 0 or len(X_test) == 0:
            logger.error(f"No valid train/test data for {target} after NaN handling.")
            return None, None, None

        print("Test2: Before scaling")
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        if use_lstm:
            # Reshape for LSTM [samples, timesteps, features]
            X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
            X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

        X_train_tensor = torch.FloatTensor(X_train_scaled)
        y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1, 1)
        X_test_tensor = torch.FloatTensor(X_test_scaled)
        y_test_tensor = torch.FloatTensor(y_test.values).reshape(-1, 1)

        input_size = X.shape[1]
        model = StockPriceLSTM(input_size, params['hidden_size']) if use_lstm else StockPriceNN(input_size, params['hidden_size'])
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

        mlflow.set_experiment("stock_price_forecasting")
        with mlflow.start_run(run_name=f"{target}_horizon{horizon}"):
            mlflow.set_tag("ticker", target.split('_')[0])
            mlflow.set_tag("horizon", horizon)
            mlflow.set_tag("model_type", "LSTM" if use_lstm else "NN")
            model.train()
            for epoch in range(params['epochs']):
                for i in range(0, len(X_train_tensor), params['batch_size']):
                    batch_X = X_train_tensor[i:i + params['batch_size']]
                    batch_y = y_train_tensor[i:i + params['batch_size']]
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()
                if (epoch + 1) % 10 == 0:
                    logger.info(f"Epoch {epoch+1}/{params['epochs']} - Loss: {loss.item():.4f}")

            model.eval()
            with torch.no_grad():
                y_pred = model(X_test_tensor).numpy().flatten()
            # Bias correction
            with torch.no_grad():
                y_train_pred = model(X_train_tensor).numpy().flatten()
            bias = np.mean(y_train_pred - y_train)
            y_pred += bias
            metrics = calculate_metrics(y_test, y_pred)
            logger.info(f"Metrics for {target} (horizon {horizon}): {metrics}")

            current_prices = df.loc[y_test.index, target]
            logger.info(f"Current prices shape: {current_prices.shape}, y_test shape: {y_test.shape}")
            price_change = (y_pred - current_prices) / current_prices * 100
            signals = np.where(price_change > 5, "Buy", np.where(price_change < -5, "Sell", "Hold"))
            backtest_df = pd.DataFrame({
                "Date": y_test.index,
                "Actual": y_test,
                "Predicted": y_pred,
                "Current_Price": current_prices,
                "Signal": signals
            })
            backtest_df.to_csv(f"backtest_{target}_horizon{horizon}.csv")
            mlflow.log_params(params)
            mlflow.log_metrics(metrics)
            mlflow.log_param("features", ", ".join(selected_features))
            mlflow.log_artifact(f"backtest_{target}_horizon{horizon}.csv")
            mlflow.pytorch.log_model(model, "pytorch_model", input_example=X_train_scaled[:1])
            model_uri = f"runs:/{mlflow.active_run().info.run_id}/pytorch_model"
            registered_model = mlflow.register_model(model_uri, f"StockPricePyTorch_Horizon{horizon}")
            logger.info(f"Model registered: {registered_model.name} version {registered_model.version}")

        return model, metrics, backtest_df
    except Exception as e:
        print(f"Error in train_pytorch_forecast for {target} horizon {horizon}: {str(e)}")
        logger.error(f"Error in train_pytorch_forecast for {target} horizon {horizon}: {str(e)}")
        return None, None, None

def getStockFeatures(ticker="AAPL"):
    print("Test: Entering getStockFeatures")
    """Get top 10 features for a stock using Random Forest."""
    data_path = Path(f"C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/data/combined/{ticker}_combined.parquet")
    if not data_path.exists():
        logger.error(f"Data file for {ticker} not found at {data_path}.")
        return []
    df = pd.read_parquet(data_path)
    target = f"{ticker}_Close"
    if target not in df.columns:
        logger.error(f"Target column {target} not found in data for {ticker}.")
        return []
    df[target] = pd.to_numeric(df[target], errors='coerce')
    if df[target].isna().any():
        logger.warning(f"Target {target} contains {df[target].isna().sum()} NaN values after conversion.")
        df = df.dropna(subset=[target])
    features = [col for col in df.columns if col != target and not col.startswith(target + '_t')]
    logger.info(f"Found {len(features)} features for {ticker}: {features}")
    if not features:
        logger.error(f"No valid features found for {ticker}.")
        return []

    df = df.dropna(subset=features + [target])
    logger.info(f"Data shape after NaN handling for {ticker}: {df.shape}")
    if df.empty:
        logger.error(f"No valid data for {ticker} after NaN handling.")
        return []

    X = df[features]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    rf = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1)
    cv_scores = cross_val_score(rf, X_train_scaled, y_train, cv=5, scoring="r2")
    logger.info(f"Cross-validated R-squared for {ticker}: {cv_scores.mean():.3f} (±{cv_scores.std():.3f})")
    rf.fit(X_train_scaled, y_train)
    importances = rf.feature_importances_
    importance_df = pd.DataFrame({
        "Feature": features,
        "Importance": importances
    }).sort_values(by="Importance", ascending=False)
    return importance_df.head(10)['Feature'].tolist()

def process_stock(ticker, horizons=[1, 3]):
    print("Test: Entering process_stock")
    """Process a single stock."""
    try:
        logger.info(f"Starting processing for {ticker}")
        top_features = getStockFeatures(ticker)
        if not top_features:
            logger.error(f"No features returned for {ticker}.")
            return None
        results = []
        model_path = f"C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/data/combined/{ticker}_combined.parquet"
        target_col = f"{ticker}_Close"
        for horizon in horizons:
            logger.info(f"Training model for {ticker} horizon {horizon}")
            use_lstm = (horizon == 3)  # Use LSTM for 3-month horizon
            model, metrics, backtest_df = train_pytorch_forecast(model_path, target_col, top_features, horizon, use_lstm=use_lstm)
            if metrics is None:
                logger.error(f"Failed to train model for {ticker} horizon {horizon}")
                continue
            results.append({
                'ticker': ticker,
                'horizon': horizon,
                'rmse': metrics['rmse'],
                'mae': metrics['mae'],
                'r2': metrics['r2'],
                'top_features': top_features,
                'backtest_df': backtest_df
            })
        logger.info(f"Completed processing for {ticker} with {len(results)} results")
        return results
    except Exception as e:
        logger.error(f"Error processing {ticker}: {str(e)}")
        return None

def create_dashboard(results):
    """Create a Dash dashboard for buy/sell signals."""
    app = Dash(__name__)
    children = [
        html.H1("Stock Price Prediction Dashboard"),
        html.H2("Top Stocks by R²")
    ]
    
    for result in results:
        ticker = result['ticker']
        horizon = result['horizon']
        backtest_df = result['backtest_df']
        fig = px.line(backtest_df, x="Date", y=["Actual", "Predicted"], title=f"{ticker} {horizon}-Month Forecast")
        fig.add_scatter(x=backtest_df[backtest_df["Signal"] == "Buy"]["Date"],
                        y=backtest_df[backtest_df["Signal"] == "Buy"]["Predicted"],
                        mode="markers", name="Buy", marker=dict(symbol="triangle-up", size=10, color="green"))
        fig.add_scatter(x=backtest_df[backtest_df["Signal"] == "Sell"]["Date"],
                        y=backtest_df[backtest_df["Signal"] == "Sell"]["Predicted"],
                        mode="markers", name="Sell", marker=dict(symbol="triangle-down", size=10, color="red"))
        children.append(html.H3(f"{ticker} (Horizon: {horizon} months, R²: {result['r2']:.3f})"))
        children.append(dcc.Graph(figure=fig))
        children.append(dash_table.DataTable(
            data=backtest_df.to_dict('records'),
            columns=[{"name": i, "id": i} for i in backtest_df.columns],
            style_table={'overflowX': 'auto'},
            page_size=10
        ))
        children.append(html.Hr())

    app.layout = html.Div(children)
    app.run(debug=True)

if __name__ == "__main__":
    print("Test: Main block")
    tickers = pd.read_csv("C:/Users/Steel/Desktop/Projects/intel-sweep/intel-sweep/src/data/equities.csv")['Symbol'].tolist()
    logger.info(f"Processing {len(tickers)} tickers: {tickers}")
    # Process a single ticker for testing
    results = process_stock("AAPL")
    print(f"Results: {results}")
    
    if results:
        # Create dashboard for AAPL
        create_dashboard(results)
        
        # Process all tickers
        results = Parallel(n_jobs=-1)(delayed(process_stock)(ticker) for ticker in tickers)  # Limit for demo
        results = [r for r in sum([r if r else [] for r in results], []) if r is not None]
        results_df = pd.DataFrame(results)
        if not results_df.empty:
            logger.info(f"Results DataFrame shape: {results_df.shape}")
            top_10 = results_df.sort_values(by='r2', ascending=False).head(10)
            logger.info(f"Top 10 models by R²:\n{top_10[['ticker', 'horizon', 'r2', 'rmse', 'mae']]}")
            top_10.to_csv("top_10_forecasts_by_r2.csv", index=False)

            with mlflow.start_run(run_name="Top_10_Forecasts_Summary"):
                mlflow.log_artifact("top_10_forecasts_by_r2.csv")
                for i, row in top_10.iterrows():
                    mlflow.log_metric(f"{row['ticker']}_horizon{row['horizon']}_r2", row['r2'])
                    mlflow.log_metric(f"{row['ticker']}_horizon{row['horizon']}_rmse", row['rmse'])
                    mlflow.log_metric(f"{row['ticker']}_horizon{row['horizon']}_mae", row['mae'])

            create_dashboard(results)  # Dashboard for all tickers

Test: Main block
Test: Entering process_stock
Test: Entering getStockFeatures
Test: Entering train_pytorch_forecast
Test2: Before scaling




Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  "inputs": [
    [
      -0.8105476339471039,
      -1.4510651868949207,
      -0.8141476593511862,
      -0.93939120974452,
      -1.270227241772032,
      -1.4367792847944094,
      -1.7236754234136695,
      -2.0716649718066393,
      -1.5348917661101735,
      1.6610900059066525
    ]
  ]
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: mat1 and mat2 must have the same dtype, but got Double and Float
Registered model 'StockPricePyTorch_Horizon1' already exists. Creating a new version of this model...
Created version '13' of model 'StockPricePyTorch_Horizon1'.


Test: Entering train_pytorch_forecast
Test2: Before scaling




Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  "inputs": [
    [
      [
        -0.8195741568753288,
        -1.455247777830105,
        -0.8248169877221491,
        -0.9379360211673813,
        -1.2680000427246219,
        -1.4407257544237047,
        -1.7225549365198587,
        -2.0792044439367303,
        -1.5388311510898904,
        1.6537812645384349
      ]
    ]
  ]
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: input must have the type torch.float32, got type torch.float64
Registered model 'StockPricePyTorch_Horizon3' already exists. Creating a new version of this model...
Created version '13' of model 'StockPricePyTorch_Horizon3'.


Results: [{'ticker': 'AAPL', 'horizon': 1, 'rmse': 27.32069282759836, 'mae': 23.51427765653016, 'r2': 0.5319926980359542, 'top_features': ['AAPL_Close_ma3', 'SRVPRD_lag1', 'AAPL_Close_lag1', 'PAYEMS_lag1', 'BUSINV_lag3', 'SRVPRD_lag3', 'CPIAUCSL_lag6', 'CSUSHPINSA', 'PI_lag1', 'CIVPART'], 'backtest_df':                  Date    Actual   Predicted  Current_Price Signal
2020-04-01 2020-04-01   77.0563   65.406296        71.2057   Sell
2020-05-01 2020-05-01   88.6527   77.694702        77.0563   Hold
2020-06-01 2020-06-01  103.2921   79.363571        88.6527   Sell
2020-07-01 2020-07-01  125.4358   84.844360       103.2921   Sell
2020-08-01 2020-08-01  112.7783   93.349678       125.4358   Sell
...               ...       ...         ...            ...    ...
2024-12-01 2024-12-01  235.4321  203.717346       249.8174   Sell
2025-01-01 2025-01-01  241.2580  209.173309       235.4321   Sell
2025-02-01 2025-02-01  221.8391  205.809631       241.2580   Sell
2025-03-01 2025-03-01  212.2217  20


A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.


A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.


A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.


A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.


A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.



In [21]:
results_df

Unnamed: 0,ticker,horizon,rmse,mae,r2,top_features,backtest_df
0,NVDA,1,23.682996,15.688522,0.693331,"[SP500_lag3, NVDA_Close_ma3, PAYEMS_lag1, RSXF...",Date Actual Predicted Cu...
1,NVDA,3,60.131066,42.047603,-0.976946,"[SP500_lag3, NVDA_Close_ma3, PAYEMS_lag1, RSXF...",Date Actual Predicted Cu...
2,MSFT,1,38.066642,31.475942,0.761796,"[CSUSHPINSA_lag6, CSUSHPINSA_lag3, CSUSHPINSA_...",Date Actual Predicted C...
3,MSFT,3,284.814116,273.926725,-12.334717,"[CSUSHPINSA_lag6, CSUSHPINSA_lag3, CSUSHPINSA_...",Date Actual Predicted Cu...
4,AAPL,1,26.516358,22.643422,0.559144,"[AAPL_Close_ma3, SRVPRD_lag1, AAPL_Close_lag1,...",Date Actual Predicted C...
5,AAPL,3,135.395763,129.373778,-10.494216,"[AAPL_Close_ma3, SRVPRD_lag1, AAPL_Close_lag1,...",Date Actual Predicted Cu...
6,AMZN,1,60.468072,55.942654,-2.348495,"[ICSA_lag6, SP500_lag1, SP500, SP500_lag3, AMZ...",Date Actual Predicted C...
7,AMZN,3,125.39982,120.908018,-13.400953,"[ICSA_lag6, SP500_lag1, SP500, SP500_lag3, AMZ...",Date Actual Predicted Cu...
8,GOOG,1,37.567129,33.386094,-0.851867,"[GOOG_Close_ma3, BUSINV_lag3, CSUSHPINSA, CSUS...",Date Actual Predicted C...
9,GOOG,3,106.920773,103.295563,-14.000921,"[GOOG_Close_ma3, BUSINV_lag3, CSUSHPINSA, CSUS...",Date Actual Predicted Cu...
