In [None]:
from bsf_env import init_spark, init_mariadb_engine,set_spark_verbosity
from pyspark.sql.functions import lit, current_timestamp
import pandas as pd
import numpy as np
from pyspark.sql.types import *
from tqdm import tqdm
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from IPython.display import display, HTML
from pyspark.sql import functions as F
from pyspark.sql.window import Window
import joblib
import tempfile
import os

spark = init_spark("bsf_candidates_analysis", log_level="WARN", show_progress=False, enable_ui=True, priority=False)
engine = init_mariadb_engine()

ingest_ts = spark.sql("SELECT current_timestamp()").collect()[0][0]

pd.set_option("display.max_columns", None)  # Show all columns
pd.set_option("display.width", 200)         # Adjust width for readability
pd.set_option("display.max_rows", 20)       # Show only top 20 rows by default

# Show tables
tables_df = spark.sql("SHOW TABLES IN bsf")
tables_df.show(truncate=False)

# Add row count for each table
for row in tables_df.collect():
    table_name = row['tableName']
    full_name = f"bsf.{table_name}"
    
    try:
        count = spark.table(full_name).count()
    except Exception as e:
        count = f"Error: {e}"
    
    print(f"Table: {full_name} | Rows: {count}")


df_last = spark.table("bsf.daily_signals_last_allcol ")
df_all = spark.table("bsf.daily_signals")

df_all.groupBy("Action").count().orderBy(F.desc("count")).show(truncate=False)

print(df_all.groupBy("TimeFrame", "Action") \
  .count() \
  .orderBy("TimeFrame", F.desc("count")) \
  .show(truncate=False))

df = df_last.cache()
# -----------------------------
# Aggregate Buy/Sell/Hold counts per company per timeframe
# -----------------------------
df_counts = df.groupBy("CompanyId", "TimeFrame").agg(
    F.sum(F.when(F.col("Action") == "Buy", 1).otherwise(0)).alias("BuyCount"),
    F.sum(F.when(F.col("Action") == "Sell", 1).otherwise(0)).alias("SellCount"),
    F.sum(F.when(F.col("Action") == "Hold", 1).otherwise(0)).alias("HoldCount"),
    F.sum("Return").alias("Return")
)

# -----------------------------
# Define window partitioned by timeframe
# -----------------------------
w_buy = Window.partitionBy("TimeFrame").orderBy(F.desc("BuyCount"))
w_sell = Window.partitionBy("TimeFrame").orderBy(F.desc("SellCount"))
w_hold = Window.partitionBy("TimeFrame").orderBy(F.desc("HoldCount"))

# -----------------------------
# Add separate rank columns
# -----------------------------
df_ranked = (
    df_counts
    .withColumn("BuyRank", F.row_number().over(w_buy))
    .withColumn("SellRank", F.row_number().over(w_sell))
    .withColumn("HoldRank", F.row_number().over(w_hold))
)

# -----------------------------
# Select what you want
# -----------------------------
ranked_companies = df_ranked.select(
    "CompanyId", "TimeFrame", "BuyCount", "SellCount", "HoldCount", 
    "Return", "BuyRank", "SellRank", "HoldRank"
)

# -----------------------------
# Join back to the original df to get full rows with rank by last return
# -----------------------------
ranked_rows = df.join(ranked_companies, on=["CompanyId", "TimeFrame"], how="inner")

display(ranked_rows.filter(F.col("BuyRank") <= 1).orderBy("TimeFrame", "BuyRank").toPandas())


# -----------------------------

# Convert to Pandas and save as csv
# -----------------------------
#List of timeframes
timeframes = ["Short", "Swing", "Long", "Daily"]

for tf in timeframes:
    df_name = f"pdf_{tf.lower()}"  # e.g., "pdf_short"
    df_name_all = f"pdf_{tf.lower()}_all"  # e.g., "pdf_short_all"
    globals()[df_name] = ranked_rows.filter(F.col("TimeFrame") == tf).toPandas()
    globals()[df_name_all] = df_all.filter(F.col("TimeFrame") == tf).toPandas()
    #ranked_rows.filter(F.col("TimeFrame") == tf).toPandas().to_csv(f"cvs/{tf.lower()}_output.csv", index=False)
    
# pdf_short = ranked_rows.filter(F.col("TimeFrame") == "Short").toPandas()
# pdf_short.to_csv(f"cvs/short_output.csv", index=False)

timeframe_dfs = {
    "Short": pdf_short,
    "Swing": pdf_swing,
    "Long": pdf_long,
    "Daily": pdf_daily
}
timeframe_dfs_all = {
    "Short": pdf_short_all,
    "Swing": pdf_swing_all,
    "Long": pdf_long_all,
    "Daily": pdf_daily_all
}

timeframes = ["Short", "Swing", "Long", "Daily"]

# Store Spark DFs only (no toPandas here)
timeframe_dfs = {tf: ranked_rows.filter(F.col("TimeFrame") == tf) for tf in timeframes}
timeframe_dfs_all = {tf: df_all.filter(F.col("TimeFrame") == tf) for tf in timeframes}


#full
# -------------------------
# Full Stage 1 → Stage 2 Pipeline
# -------------------------
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pycaret.regression import setup, compare_models, predict_model, finalize_model

# -------------------------
# Stage 1: Predict TomorrowClose
# -------------------------
target_stage1 = "TomorrowClose"
epsilon = 1e-6
all_stage1_predictions = []
top_n=5
# forecast steps per timeframe for Stage 2
forecast_steps_map = {
    "Daily": 1,
    "Short": 3,
    "Swing": 5,
    "Long": 10
}

# Loop over timeframes
for tf, sdf_tf in timeframe_dfs_all.items():
    pdf_tf = sdf_tf.toPandas()
    companies = pdf_tf['CompanyId'].unique()
    print(f"\n=== Phase 1 - Processing timeframe: {tf} ===")
    # Loop over companies
    for cid in companies:
        df_c = pdf_tf[pdf_tf['CompanyId'] == cid].copy()

        # -------------------------------
        # Log-transform OHLC to normalize scale
        # -------------------------------
        for col in ["Open","High","Low","Close"]:
            df_c[f"log_{col}"] = np.log(df_c[col].replace(0, epsilon))

        # -------------------------------
        # Training data: rows where target is known
        # -------------------------------
        train_df = df_c[df_c[target_stage1].notna()].copy()
        if train_df.empty:
            continue

        # -------------------------------
        # Feature selection: numeric columns correlated with target
        # -------------------------------
        numeric_cols = train_df.select_dtypes(include=[np.number]).columns.tolist()
        if target_stage1 in numeric_cols:
            numeric_cols.remove(target_stage1)

        corr = train_df[numeric_cols + [target_stage1]].corr()[target_stage1].abs()
        threshold = 0.03  # minimal correlation
        good_features = corr[corr >= threshold].index.tolist()

        X_train = train_df[good_features].fillna(0)
        y_train = train_df[target_stage1]

        # -------------------------------
        # Stage 1 models
        # -------------------------------
        lr_model = LinearRegression().fit(X_train, y_train)
        lasso_model = Lasso(alpha=0.01).fit(X_train, y_train)
        ridge_model = Ridge(alpha=1.0, solver="svd").fit(X_train, y_train)

        # -------------------------------
        # Predict future rows (target is NaN)
        # -------------------------------
        future_df = df_c[df_c[target_stage1].isna()].copy()
        if not future_df.empty:
            X_future = future_df[good_features].fillna(0)
            future_df["Pred_Linear"] = lr_model.predict(X_future)
            future_df["Pred_Lasso"] = lasso_model.predict(X_future)
            future_df["Pred_Ridge"] = ridge_model.predict(X_future)
            future_df["TimeFrame"] = tf
            future_df["CompanyId"] = cid
            all_stage1_predictions.append(future_df)

# -------------------------------
# Combine Stage 1 predictions
# -------------------------------
if all_stage1_predictions:
    stage1_df = pd.concat(all_stage1_predictions, ignore_index=True)
else:
    stage1_df = pd.DataFrame()
    print("No predictions generated.")

# -------------------------------
# Top-N selection per timeframe (using average of Linear/Lasso/Ridge)
# -------------------------------
if not stage1_df.empty:
    stage1_df["PredictedTomorrowClose"] = stage1_df[["Pred_Linear","Pred_Lasso","Pred_Ridge"]].mean(axis=1)

    top_list = []
    for tf in stage1_df["TimeFrame"].unique():
        tf_df = stage1_df[stage1_df["TimeFrame"] == tf].copy()
        tf_df = tf_df.sort_values("PredictedTomorrowClose", ascending=False)
        top_list.append(tf_df.head(top_n))

    stage1_top_df = pd.concat(top_list, ignore_index=True)

    print("\n=== Stage 1 Top Predictions per Timeframe ===")
    print(stage1_top_df[["TimeFrame", "CompanyId", "PredictedTomorrowClose"]])
else:
    stage1_top_df = pd.DataFrame()

# -------------------------
# Phase 2: SARIMAX + PyCaret (optimized)
# -------------------------
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pycaret.regression import setup, create_model, tune_model, finalize_model, predict_model

# Stage 2 targets
target_stage1 = "PredictedTomorrowClose"
target_stage2 = "TomorrowReturn"

# Forecast steps per timeframe
forecast_steps_map = {
    "Daily": 1,
    "Short": 3,
    "Swing": 5,
    "Long": 10
}

# Option 1: Use all Stage 1 predictions
combined_top_df_clean = stage1_df.fillna(0)

# Option 2: Use only the top-N per timeframe
combined_top_df_clean = stage1_top_df.fillna(0)

# Numeric features (exclude targets)
stage2_features = [c for c in combined_top_df_clean.select_dtypes(include=[np.number]).columns 
                   if c not in [target_stage1, target_stage2]]

# -------------------------
# Phase 2: SARIMAX + PyCaret (optimized)
# -------------------------
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import mlflow
import mlflow.sklearn
from pycaret.regression import setup, create_model, tune_model, finalize_model, predict_model

mlflow.set_tracking_uri("http://localhost:8001")
mlflow.set_experiment("Stage2_SARIMAX_PyCaret")

sarimax_results = []
pycaret_results = []

# with memory issues may want to limit
pycaret_models = ["lr", "lasso", "ridge", "en"]
#pycaret_models = ["lr"]


"""
for tf, steps in forecast_steps_map.items():
    df_tf = timeframe_dfs_all[tf].toPandas().copy()

    top_companies = combined_top_df_clean.loc[
        combined_top_df_clean["TimeFrame"] == tf, "CompanyId"
    ].unique()

    for cid in top_companies:
        df_c = df_tf[df_tf["CompanyId"] == cid].copy().dropna(subset=[target_stage2])

        if df_c.empty or len(df_c) < 120:
            print(f"⏭️ Skipping {cid}-{tf} (not enough data)")
            continue

        ts = df_c[target_stage2]

        # -------------------------
        # SARIMAX
        # -------------------------
        try:
            sarimax_entry = {"CompanyId": cid, "TimeFrame": tf}

            with mlflow.start_run(run_name=f"SARIMAX_{cid}_{tf}"):
                # Example fixed order (replace with auto_arima search results if you have them)
                order = (1, 1, 1)
                seasonal_order = (0, 1, 1, 7)

                sarimax_model = SARIMAX(
                    ts, order=order, seasonal_order=seasonal_order,
                    enforce_stationarity=False, enforce_invertibility=False
                )
                sarimax_fit = sarimax_model.fit(disp=False)

                forecast = sarimax_fit.get_forecast(steps=steps)
                mean_pred = forecast.predicted_mean.mean()
                '''
                sarimax_entry["Pred_SARIMAX"] = mean_pred
                mlflow.log_metric("MeanPred_SARIMAX", mean_pred)

                # Log params
                mlflow.log_params({"order": order, "seasonal_order": seasonal_order})

            sarimax_results.append(sarimax_entry)
            '''
                sarimax_entry = {
                    "CompanyId": cid,
                    "TimeFrame": tf,
                    "Pred_SARIMAX": float(mean_pred),  # ensure scalar
                    "order": str(order),
                    "seasonal_order": str(seasonal_order)
                }
                mlflow.log_metric("MeanPred_SARIMAX", mean_pred)
                mlflow.log_params({"order": order, "seasonal_order": seasonal_order})
            
                # save the model object as an artifact
                with tempfile.TemporaryDirectory() as tmpdir:
                    model_path = os.path.join(tmpdir, "sarimax_model.pkl")
                    joblib.dump(sarimax_fit, model_path)
                    mlflow.log_artifact(model_path, name="SARIMAX_model")


        except Exception as e:
            print(f"❌ SARIMAX failed for {cid}-{tf}: {e}")
"""
for tf, steps in forecast_steps_map.items():
    df_tf = timeframe_dfs_all[tf].toPandas().copy()

    top_companies = combined_top_df_clean.loc[
        combined_top_df_clean["TimeFrame"] == tf, "CompanyId"
    ].unique()

    for cid in top_companies:
        df_c = df_tf[df_tf["CompanyId"] == cid].copy().dropna(subset=[target_stage2])

        if df_c.empty or len(df_c) < 120:
            print(f"⏭️ Skipping {cid}-{tf} (not enough data)")
            continue

        ts = df_c[target_stage2]

        # -------------------------
        # SARIMAX
        # -------------------------
        try:
            sarimax_entry = {"CompanyId": cid, "TimeFrame": tf}

            with mlflow.start_run(run_name=f"SARIMAX_{cid}_{tf}"):
                # Example fixed order (replace with auto_arima search results if you have them)
                order = (1, 1, 1)
                seasonal_order = (0, 1, 1, 7)

                sarimax_model = SARIMAX(
                    ts, order=order, seasonal_order=seasonal_order,
                    enforce_stationarity=False, enforce_invertibility=False
                )
                sarimax_fit = sarimax_model.fit(disp=False)

                forecast = sarimax_fit.get_forecast(steps=steps)
                mean_pred = forecast.predicted_mean.mean()

                # Store results
                sarimax_entry.update({
                    "Pred_SARIMAX": float(mean_pred),
                    "order": str(order),
                    "seasonal_order": str(seasonal_order)
                })

                # Log metric and params
                mlflow.log_metric("MeanPred_SARIMAX", mean_pred)
                mlflow.log_params({"order": order, "seasonal_order": seasonal_order})

                # Log SARIMAX as proper MLflow model
                import mlflow.statsmodels
                mlflow.statsmodels.log_model(
                    sarimax_fit,
                    name="SARIMAX_model",
                    input_example=ts.head(5).to_frame()
                )

            sarimax_results.append(sarimax_entry)

        except Exception as e:
            print(f"❌ SARIMAX failed for {cid}-{tf}: {e}")

        # -------------------------
        # PyCaret
        # -------------------------
        try:
            stage2_features_c = [
                c for c in df_c.select_dtypes(include=[np.number]).columns
                if c not in [target_stage1, target_stage2]
            ]

            pycaret_entry = {"CompanyId": cid, "TimeFrame": tf}

            s = setup(
                data=df_c,
                target=target_stage2,
                numeric_features=stage2_features_c,
                session_id=42,
                log_experiment=False,   # ✅ manual MLflow control
                html=False
            )

            for model_name in pycaret_models:
                with mlflow.start_run(run_name=f"PyCaret_{model_name}_{cid}_{tf}"):
                    '''
                    Memory issues: 
                    Model list is above
                    options
                    ------------
                    df_c_small = df_c.sample(frac=0.3, random_state=42)
                    tuned = tune_model(model, fold=2, optimize="MAE", n_iter=10)
                    
                    model = create_model(model_name, fold=2)  # or even 1
                    tuned = tune_model(model, fold=2, optimize="MAE")

                    suggested:
                    model = create_modelmodel_name, fold=2)
                    final = finalize_model(model)
                    preds = predict_model(final, data=df_c)
                    ------------
                    '''
                    try:
                        model = create_model(model_name, fold=3)
                        tuned = tune_model(model, fold=3, optimize="MAE")
                        final = finalize_model(tuned)
                    except Exception as e:
                        print(f"⚠️ Tuning failed for {model_name} {cid}-{tf}: {e}")
                        # fallback to untuned
                        final = finalize_model(model)

                    preds = predict_model(final, data=df_c)
                    pred_col = next(
                        (c for c in ["Label", "prediction_label", "prediction"] if c in preds.columns),
                        None
                    )
                    mean_pred = preds[pred_col].mean()
                    pycaret_entry[f"Pred_{model_name}_PyCaret"] = mean_pred

                    mlflow.log_metric(f"MeanPred_{model_name}", mean_pred)

                    # Log sklearn model directly
                    mlflow.pycaret.log_model(
                        model=final_model,
                        name=f"{model_name}_model",
                        input_example=df_c.head(1)  # just one row is enough
                    )

            pycaret_results.append(pycaret_entry)

        except Exception as e:
            print(f"❌ PyCaret failed for {cid}-{tf}: {e}")


for entry in sarimax_results:
    print(entry.keys())


# Convert results
sarimax_df = pd.DataFrame(sarimax_results)
pycaret_df = pd.DataFrame(pycaret_results)

# Merge SARIMAX + PyCaret outputs
final_df = combined_top_df_clean.merge(sarimax_df, on=['CompanyId','TimeFrame'], how='left')
final_df = final_df.merge(pycaret_df, on=['CompanyId','TimeFrame'], how='left')

# -------------------------
# Select Top N per timeframe
# -------------------------
top_n = 5

def select_top_n(df, pred_col, n=5):
    top_list = []
    for tf in df['TimeFrame'].unique():
        tf_df = df[df['TimeFrame'] == tf].copy()
        tf_df = tf_df.sort_values(pred_col, ascending=False)
        top_list.append(tf_df.head(n))
    return pd.concat(top_list, ignore_index=True)

# Example: select top 5 by SARIMAX
top_sarimax_df = select_top_n(final_df, 'Pred_SARIMAX', top_n)

# Example: select top 5 by Ridge
def select_top_n(df, pred_col, n=5):
    if pred_col not in df.columns:
        print(f"⚠️ Column {pred_col} not found in DataFrame")
        return pd.DataFrame()  # return empty
    top_list = []
    for tf in df['TimeFrame'].unique():
        tf_df = df[df['TimeFrame'] == tf].copy()
        tf_df = tf_df.sort_values(pred_col, ascending=False)
        top_list.append(tf_df.head(n))
    return pd.concat(top_list, ignore_index=True) if top_list else pd.DataFrame()


# Merge for comparison
top_combined_df = top_sarimax_df.merge(
    pycaret_df,
    on=['CompanyId','TimeFrame'],
    how='outer',
    suffixes=('_SARIMAX','_PyCaret')
)

print("Top N companies per timeframe (combined SARIMAX + PyCaret):")
print(top_combined_df)

# pdf_short = ranked_rows.filter(F.col("TimeFrame") == "Short").toPandas()
top_combined_df.to_csv(f"cvs/final_top_combined_df.csv", index=False)




def select_top_n_final(df, pred_col="PredictedTomorrowClose", n=5):
    """
    Select top-N rows per TimeFrame by prediction column.
    """
    top_list = []
    for tf in df['TimeFrame'].unique():
        tf_df = df[df['TimeFrame'] == tf].copy()
        tf_df = tf_df.sort_values(pred_col, ascending=False)
        top_list.append(tf_df.head(n))
    return pd.concat(top_list, ignore_index=True)

# Pick top-N by PredictedTomorrowClose
top_candidates = select_top_n_final(final_df, pred_col="PredictedTomorrowClose", n=5)

# Reduce to just what you need for DB write
top_out = top_candidates[[
    "CompanyId",
    "TimeFrame",
    "PredictedTomorrowClose"
]]

print(top_out)

# pdf_short = ranked_rows.filter(F.col("TimeFrame") == "Short").toPandas()
top_combined_df.to_csv(f"cvs/final_top_combined_1_df.csv", index=False)


spark.stop()

:: loading settings :: url = jar:file:/home/jupyter/.venv/python3.9_bsf/lib/python3.9/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/jupyter/.ivy2/cache
The jars for the packages stored in: /home/jupyter/.ivy2/jars
io.delta#delta-spark_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-09b1fdb9-5cae-4725-b416-c4cbacec0df9;1.0
	confs: [default]
	found io.delta#delta-spark_2.12;3.0.0rc1 in spark-list
	found io.delta#delta-storage;3.0.0rc1 in spark-list
	found org.antlr#antlr4-runtime;4.9.3 in spark-list
:: resolution report :: resolve 852ms :: artifacts dl 42ms
	:: modules in use:
	io.delta#delta-spark_2.12;3.0.0rc1 from spark-list in [default]
	io.delta#delta-storage;3.0.0rc1 from spark-list in [default]
	org.antlr#antlr4-runtime;4.9.3 from spark-list in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	

[Spark] Started 'bsf_candidates_analysis' log_level=WARN (effective=WARN), progress=False


25/09/12 13:00:10 WARN TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources
25/09/12 13:00:28 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist
25/09/12 13:00:28 WARN HiveConf: HiveConf of name hive.metastore.client.connect.timeout does not exist
25/09/12 13:00:28 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist
25/09/12 13:00:31 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist
25/09/12 13:00:31 WARN HiveConf: HiveConf of name hive.metastore.client.connect.timeout does not exist
25/09/12 13:00:31 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist
25/09/12 13:00:35 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist
25/09/12 13:00:35 WARN HiveConf: HiveConf of name hive.metastore.client.connect.timeout does not exist
25/09/12 13:00:35 WARN HiveConf: HiveConf of name hive.stats.retries.w

+---------+-------------------------+-----------+
|namespace|tableName                |isTemporary|
+---------+-------------------------+-----------+
|bsf      |company                  |false      |
|bsf      |companystockhistory      |false      |
|bsf      |daily_signals            |false      |
|bsf      |daily_signals_allcol     |false      |
|bsf      |daily_signals_last       |false      |
|bsf      |daily_signals_last_allcol|false      |
+---------+-------------------------+-----------+

Table: bsf.company | Rows: 30949
Table: bsf.companystockhistory | Rows: 461636
Table: bsf.daily_signals | Rows: 1867416
Table: bsf.daily_signals_allcol | Rows: 1867416
Table: bsf.daily_signals_last | Rows: 7440
Table: bsf.daily_signals_last_allcol | Rows: 7440
+------+-------+
|Action|count  |
+------+-------+
|Hold  |1327332|
|Buy   |299551 |
|Sell  |240533 |
+------+-------+

+---------+------+------+
|TimeFrame|Action|count |
+---------+------+------+
|Daily    |Hold  |328176|
|Daily    |Buy

Unnamed: 0,CompanyId,TimeFrame,StockDate,Open,High,Low,Close,Doji,Hammer,HangingMan,InvertedHammer,ShootingStar,BullishMarubozu,BearishMarubozu,SuspiciousCandle,BullishEngulfing,BearishEngulfing,BullishHarami,BearishHarami,HaramiCross,PiercingLine,DarkCloudCover,MorningStar,EveningStar,ThreeWhiteSoldiers,ThreeBlackCrows,TweezerTop,TweezerBottom,InsideBar,OutsideBar,NearHigh,NearLow,PatternCount,PatternType,MA,MA_slope,UpTrend_MA,DownTrend_MA,RecentReturn,UpTrend_Return,DownTrend_Return,Volatility,LowVolatility,HighVolatility,ROC,MomentumUp,MomentumDown,ConfirmedUpTrend,ConfirmedDownTrend,ValidHammer,ValidBullishEngulfing,ValidPiercingLine,ValidMorningStar,ValidThreeWhiteSoldiers,ValidBullishMarubozu,ValidTweezerBottom,ValidShootingStar,ValidBearishEngulfing,ValidDarkCloud,ValidEveningStar,ValidThreeBlackCrows,ValidBearishMarubozu,ValidTweezerTop,ValidHaramiCross,ValidBullishHarami,ValidBearishHarami,ValidInsideBar,ValidOutsideBar,TomorrowClose,TomorrowReturn,Return,AvgReturn,MomentumZ,BuyThresh,SellThresh,MomentumAction,BullScore,BearScore,PatternScore,PatternScoreNorm,PatternAction,CandleAction,CandidateAction,Action,TomorrowAction,TomorrowActionSource,SignalStrengthHybrid,ActionConfidence,BullishStrengthHybrid,BearishStrengthHybrid,SignalDuration,ValidAction,HasValidSignal,SignalStrength,BatchId,IngestedAt,BuyCount,SellCount,HoldCount,Return.1,BuyRank,SellRank,HoldRank
0,78827,Daily,2025-09-10,0.0199,0.0249,0.0141,0.0244,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,3,ThreeWhiteSoldiers,0.019321,-0.171268,False,True,0.016667,True,False,0.23579,False,True,0.226131,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,,,0.016667,0.033135,-0.069845,0.032404,-0.025646,Sell,1.0,1.0,0.0,0.0,Hold,Buy,Buy,Buy,Hold,LastRowHold,0.512664,0.512664,0.512664,0.012664,188.0,True,True,2,78827_20250911_183012,20250911_183012,1,0,0,0.016667,1,240,1550
1,34244,Long,2025-09-10,0.08,0.08,0.08,0.08,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,3,ThreeWhiteSoldiers,0.071782,0.02928,True,False,0.19403,True,False,0.065586,True,False,0.19403,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,,,0.054018,0.019757,0.522387,0.050225,-0.023302,Buy,9.0,13.0,-4.0,-0.4,Sell,Hold,Buy,Buy,Hold,LastRowHold,0.142402,0.142402,0.142402,0.142402,145.0,True,True,1,34244_20250911_174749,20250911_174749,1,0,0,0.054018,1,208,1574
2,68192,Short,2025-08-29,0.01952,0.01952,0.01952,0.01952,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,3,BullishMarubozu,0.01952,0.079646,True,False,0.0,False,False,0.213703,False,True,0.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,,,0.0,0.04153,-0.194336,-0.006376,-0.021905,Sell,6.0,0.0,6.0,2.0,Buy,Hold,Buy,Buy,Hold,LastRowHold,0.687142,0.687142,0.687142,0.687142,60.0,True,True,2,68192_20250911_164110,20250911_164110,2,0,0,0.0,1,461,1855
3,68192,Short,2025-08-29,0.01952,0.01952,0.01952,0.01952,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,3,BullishMarubozu,0.01952,0.079646,True,False,0.0,False,False,0.213703,False,True,0.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,,,0.0,0.04153,-0.194336,-0.006427,-0.021994,Sell,6.0,0.0,6.0,2.0,Buy,Hold,Buy,Buy,Hold,LastRowHold,0.687142,0.687142,0.687142,0.687142,60.0,True,True,2,68192_20250912_164101,20250912_164101,2,0,0,0.0,1,461,1855
4,41183,Swing,2025-09-10,0.009,0.009,0.0055,0.008,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,True,3,ThreeWhiteSoldiers,0.00544,0.387755,True,False,5.666667,True,False,1.296583,False,True,5.666667,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,,,0.142857,0.432333,-0.22326,0.006763,-0.02781,Sell,6.0,3.0,3.0,0.6,Buy,Hold,Buy,Buy,Hold,LastRowHold,0.203429,0.203429,0.203429,0.203429,107.0,True,True,1,41183_20250911_171408,20250911_171408,1,0,0,0.142857,1,249,1563



=== Phase 1 - Processing timeframe: Short ===

=== Phase 1 - Processing timeframe: Swing ===

=== Phase 1 - Processing timeframe: Long ===

=== Phase 1 - Processing timeframe: Daily ===

=== Stage 1 Top Predictions per Timeframe ===
   TimeFrame  CompanyId  PredictedTomorrowClose
0      Short      34193                0.296681
1      Short      34193                0.296681
2      Short      59373                0.114487
3      Short      59373                0.114487
4      Short     292453                0.093368
5      Swing      34193                0.128725
6      Swing      53152                0.086709
7      Swing     246418                0.075724
8      Swing     103533                0.074580
9      Swing       1219                0.074220
10      Long      34193                0.148601
11      Long      53152                0.087359
12      Long       1219                0.075289
13      Long     246418                0.074968
14      Long     103533                0.07419

                                                         

         MAE       MSE     RMSE           R2   RMSLE     MAPE
Fold                                                         
0     6.4267  324.4494  18.0125 -367726.2587  1.5850  19.5846
1     2.0402   47.4770   6.8904    -543.6407  0.7871  24.0991
2     0.9712   22.4706   4.7403       0.0647  0.4872   0.9483
Mean  3.1461  131.4657   9.8811 -122756.6116  0.9531  14.8773
Std   2.3605  136.8415   5.8164  173219.8409  0.4633  10.0203


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 2 candidates, totalling 6 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE       MSE     RMSE           R2   RMSLE     MAPE
Fold                                                         
0     6.4267  324.4494  18.0125 -367726.2587  1.5850  19.5846
1     2.0402   47.4770   6.8904    -543.6407  0.7871  24.0991
2     0.9712   22.4706   4.7403       0.0647  0.4872   0.9483
Mean  3.1461  131.4657   9.8811 -122756.6116  0.9531  14.8773
Std   2.3605  136.8415   5.8164  173219.8409  0.4633  10.0203
               Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Linear Regression  0.6675  3.0714  1.7525  0.4634  0.4562  2.9854




🏃 View run PyCaret_lr_34193_Daily at: http://localhost:8001/#/experiments/3/runs/a85180598d5f4b91af4c4f6249f3a333
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE        R2   RMSLE    MAPE
Fold                                                   
0     0.5385   0.3860  0.6213 -436.4400  0.4478  2.7995
1     0.6661   0.6301  0.7938   -6.2288  0.4901  1.1295
2     1.0195  24.7034  4.9703   -0.0283  0.6286  1.0339
Mean  0.7414   8.5732  2.1284 -147.5657  0.5222  1.6543
Std   0.2034  11.4063  2.0107  204.2807  0.0772  0.8107


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE        R2   RMSLE    MAPE
Fold                                                   
0     0.4663   0.2183  0.4673 -246.4598  0.3824  1.8990
1     0.4537   0.2151  0.4638   -1.4674  0.3650  0.5923
2     1.0193  24.7038  4.9703   -0.0283  0.6287  1.0337
Mean  0.6464   8.3791  1.9671  -82.6519  0.4587  1.1750
Std   0.2637  11.5433  2.1236  115.8312  0.1204  0.5427
              Model    MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Lasso Regression  0.454  5.7239  2.3925 -0.0  0.3503  0.8777




🏃 View run PyCaret_lasso_34193_Daily at: http://localhost:8001/#/experiments/3/runs/7f40f845d69245db9d428d8089ac4ecf
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE         R2   RMSLE     MAPE
Fold                                                     
0     1.0904   1.9444  1.3944 -2202.7665  0.7560  15.2662
1     1.6586  17.3058  4.1600  -197.5263  0.7601  14.4482
2     1.0215  23.3015  4.8272     0.0301  0.5042   1.5282
Mean  1.2568  14.1839  3.4605  -800.0876  0.6734  10.4142
Std   0.2855   8.9941  1.4861   995.1175  0.1197   6.2922


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE     MSE    RMSE         R2   RMSLE     MAPE
Fold                                                    
0     1.0185   1.481  1.2170 -1677.5843  0.7142  14.8196
1     1.0527   2.335  1.5281   -25.7858  0.6661   4.3709
2     1.0378  24.108  4.9100    -0.0035  0.5674   1.2646
Mean  1.0363   9.308  2.5517  -567.7912  0.6492   6.8184
Std   0.0140  10.471  1.6724   784.8128  0.0611   5.7981
              Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Ridge Regression  0.6727  4.2588  2.0637  0.2559  0.4213  4.5248




🏃 View run PyCaret_ridge_34193_Daily at: http://localhost:8001/#/experiments/3/runs/a50cf3b9e39744cb81b1069e8c03c351
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE        R2   RMSLE    MAPE
Fold                                                   
0     0.5857   0.4844  0.6960 -548.0320  0.4808  3.9606
1     0.7358   0.7883  0.8879   -8.0435  0.5265  1.2421
2     1.0195  24.7034  4.9703   -0.0283  0.6286  1.0339
Mean  0.7803   8.6587  2.1847 -185.3679  0.5453  2.0789
Std   0.1798  11.3460  1.9712  256.4631  0.0618  1.3333


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE        R2   RMSLE    MAPE
Fold                                                   
0     0.4663   0.2183  0.4673 -246.4627  0.3824  1.8990
1     0.4537   0.2151  0.4638   -1.4674  0.3650  0.5923
2     1.0193  24.7038  4.9703   -0.0283  0.6287  1.0337
Mean  0.6464   8.3791  1.9671  -82.6528  0.4587  1.1750
Std   0.2637  11.5433  2.1236  115.8326  0.1204  0.5427
         Model    MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Elastic Net  0.454  5.7239  2.3925 -0.0  0.3503  0.8777




🏃 View run PyCaret_en_34193_Daily at: http://localhost:8001/#/experiments/3/runs/734ac531529c45aa9b47c4cdb0027b6e
🧪 View experiment at: http://localhost:8001/#/experiments/3
🏃 View run SARIMAX_53152_Daily at: http://localhost:8001/#/experiments/3/runs/b39631ccbfee4f49817c2462332f08ca
🧪 View experiment at: http://localhost:8001/#/experiments/3
                    Description             Value
0                    Session id                42
1                        Target    TomorrowReturn
2                   Target type        Regression
3           Original data shape         (282, 59)
4        Transformed data shape         (282, 73)
5   Transformed train set shape         (197, 73)
6    Transformed test set shape          (85, 73)
7              Numeric features                19
8          Categorical features                11
9      Rows with missing values              4.6%
10                   Preprocess              True
11              Imputation type            simple
12   

                                                         

         MAE       MSE     RMSE        R2   RMSLE     MAPE
Fold                                                      
0     4.0459  166.7596  12.9135 -872.8738  1.2085  36.6964
1     2.3193    8.4142   2.9007  -14.3270  0.9867  29.3535
2     1.2861   28.4019   5.3293    0.0182  0.6093   6.3261
Mean  2.5504   67.8586   7.0479 -295.7275  0.9348  24.1253
Std   1.1384   70.4080   4.2645  408.1461  0.2473  12.9380


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 2 candidates, totalling 6 fits


                                                         

         MAE       MSE     RMSE        R2   RMSLE     MAPE
Fold                                                      
0     4.0459  166.7596  12.9135 -872.8738  1.2085  36.6964
1     2.1906    7.6437   2.7647  -12.9235  0.9555  33.1644
2     0.9979   28.4807   5.3367    0.0155  0.4885   4.6491
Mean  2.4115   67.6280   7.0050 -295.2606  0.8841  24.8366
Std   1.2541   70.6109   4.3079  408.4684  0.2982  14.3474
               Model    MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Linear Regression  0.752  5.9356  2.4363  0.1557  0.4539  7.8163




🏃 View run PyCaret_lr_53152_Daily at: http://localhost:8001/#/experiments/3/runs/641287e3fde9435e82ab2ee1e5026079
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.4767   0.3712  0.6093 -0.9452  0.2867  3.5277
1     0.6861   0.8314  0.9118 -0.5145  0.3845  8.9216
2     0.9833  29.2772  5.4108 -0.0120  0.5387  1.8396
Mean  0.7154  10.1599  2.3106 -0.4906  0.4033  4.7630
Std   0.2078  13.5193  2.1957  0.3813  0.1037  3.0203


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.4676   0.3227  0.5681 -0.6911  0.2648  3.5406
1     0.6116   0.7035  0.8387 -0.2815  0.3459  8.7728
2     0.9833  29.2772  5.4108 -0.0120  0.5387  1.8396
Mean  0.6875  10.1011  2.2726 -0.3282  0.3831  4.7177
Std   0.2172  13.5604  2.2219  0.2792  0.1149  2.9503
              Model     MAE     MSE    RMSE     R2  RMSLE    MAPE
0  Lasso Regression  0.6085  7.0023  2.6462  0.004  0.351  4.5404




🏃 View run PyCaret_lasso_53152_Daily at: http://localhost:8001/#/experiments/3/runs/c278e86b1c4a41dabe06700bf483d6ca
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE       MSE     RMSE        R2   RMSLE     MAPE
Fold                                                      
0     3.1254  131.8516  11.4827 -689.9447  0.9957  14.1207
1     1.5189    4.5098   2.1236   -7.2149  0.7214  12.5884
2     0.9958   28.7492   5.3618    0.0062  0.4878   2.5487
Mean  1.8800   55.0369   6.3227 -232.3844  0.7350   9.7526
Std   0.9061   55.2103   3.8808  323.5574  0.2076   5.1322


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE        R2   RMSLE     MAPE
Fold                                                    
0     2.0272  59.4177  7.7083 -310.3678  0.7908  10.4250
1     1.2122   2.5908  1.6096   -3.7194  0.6107   8.6480
2     0.9792  28.6730  5.3547    0.0089  0.4871   2.7686
Mean  1.4062  30.2272  4.8909 -104.6928  0.6295   7.2805
Std   0.4493  23.2255  2.5113  145.4422  0.1247   3.2719
              Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Ridge Regression  0.7008  6.3298  2.5159  0.0996  0.4142  6.8236




🏃 View run PyCaret_ridge_53152_Daily at: http://localhost:8001/#/experiments/3/runs/99681b4ed10b4e9eb5ca6ce811567246
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.4924   0.4196  0.6478 -1.1990  0.2892  3.7072
1     0.6963   0.8517  0.9229 -0.5514  0.3891  8.8948
2     0.9833  29.2772  5.4108 -0.0120  0.5387  1.8396
Mean  0.7240  10.1828  2.3272 -0.5875  0.4057  4.8139
Std   0.2013  13.5029  2.1834  0.4852  0.1025  2.9847


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.4657   0.3291  0.5737 -0.7247  0.2689  3.5080
1     0.6223   0.7230  0.8503 -0.3170  0.3532  8.7726
2     0.9832  29.2774  5.4109 -0.0120  0.5388  1.8379
Mean  0.6904  10.1098  2.2783 -0.3512  0.3870  4.7062
Std   0.2167  13.5545  2.2179  0.2919  0.1127  2.9551
         Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Elastic Net  0.6049  6.9939  2.6446  0.0052  0.3491  4.4672




🏃 View run PyCaret_en_53152_Daily at: http://localhost:8001/#/experiments/3/runs/d8762d5b76fc4829888c008c287a008f
🧪 View experiment at: http://localhost:8001/#/experiments/3
🏃 View run SARIMAX_246418_Daily at: http://localhost:8001/#/experiments/3/runs/cb7b580848584fd8a70f309cef20f142
🧪 View experiment at: http://localhost:8001/#/experiments/3
                    Description             Value
0                    Session id                42
1                        Target    TomorrowReturn
2                   Target type        Regression
3           Original data shape         (164, 59)
4        Transformed data shape         (164, 73)
5   Transformed train set shape         (114, 73)
6    Transformed test set shape          (50, 73)
7              Numeric features                19
8          Categorical features                11
9      Rows with missing values              7.9%
10                   Preprocess              True
11              Imputation type            simple
12  

                                                         

               MAE           MSE          RMSE            R2   RMSLE          MAPE
Fold                                                                              
0     1.126669e+06  3.622831e+13  6.018995e+06 -1.673115e+14  4.4079  7.252198e+08
1     4.921819e+05  4.602609e+12  2.145369e+06 -5.885661e+13  3.6826  1.239858e+07
2     7.527000e+00  8.329560e+01  9.126600e+00 -2.094713e+03  1.9965  5.555606e+02
Mean  5.396195e+05  1.361030e+13  2.721458e+06 -7.538938e+13  3.3624  2.458730e+08
Std   4.611792e+05  1.610334e+13  2.490777e+06  6.929784e+13  1.0101  3.389872e+08


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 2 candidates, totalling 6 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
               MAE           MSE          RMSE            R2   RMSLE          MAPE
Fold                                                                              
0     1.126669e+06  3.622831e+13  6.018995e+06 -1.673115e+14  4.4079  7.252198e+08
1     4.921819e+05  4.602609e+12  2.145369e+06 -5.885661e+13  3.6826  1.239858e+07
2     7.527000e+00  8.329560e+01  9.126600e+00 -2.094713e+03  1.9965  5.555606e+02
Mean  5.396195e+05  1.361030e+13  2.721458e+06 -7.538938e+13  3.3624  2.458730e+08
Std   4.611792e+05  1.610334e+13  2.490777e+06  6.929784e+13  1.0101  3.389872e+08
               Model    MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Linear Regression  0.124  0.0473  0.2175  0.5752  0.1285  7.8111




🏃 View run PyCaret_lr_246418_Daily at: http://localhost:8001/#/experiments/3/runs/a3e8e99d0f4c47188f2db564029fb14f
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.2018  0.2167  0.4655 -0.0007  0.2550  3.5500
1     0.1504  0.0782  0.2797 -0.0003  0.1849  4.6519
2     0.1221  0.0400  0.2001 -0.0074  0.1434  4.2400
Mean  0.1581  0.1117  0.3151 -0.0028  0.1945  4.1473
Std   0.0330  0.0759  0.1112  0.0033  0.0461  0.4546


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.2018  0.2167  0.4655 -0.0007  0.2551  3.5478
1     0.1504  0.0782  0.2797 -0.0003  0.1850  4.6491
2     0.1221  0.0400  0.2001 -0.0074  0.1434  4.2377
Mean  0.1581  0.1117  0.3151 -0.0028  0.1945  4.1449
Std   0.0330  0.0759  0.1112  0.0032  0.0461  0.4544
              Model     MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Lasso Regression  0.1601  0.1114  0.3337 -0.0  0.2024  3.7839




🏃 View run PyCaret_lasso_246418_Daily at: http://localhost:8001/#/experiments/3/runs/126ca935f99e49b0b271709b6fe4a837
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE     MAPE
Fold                                                 
0     0.2543  0.2350  0.4847 -0.0851  0.2511  19.7043
1     0.2609  0.1325  0.3640 -0.6939  0.2139  17.2027
2     0.2483  0.1184  0.3440 -1.9779  0.2241  10.2804
Mean  0.2545  0.1619  0.3976 -0.9190  0.2297  15.7292
Std   0.0052  0.0520  0.0622  0.7889  0.0157   3.9859


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.2041  0.2172  0.4661 -0.0031  0.2500  9.3151
1     0.2087  0.1072  0.3274 -0.3710  0.1870  9.9385
2     0.1772  0.0662  0.2573 -0.6655  0.1712  6.2185
Mean  0.1967  0.1302  0.3503 -0.3465  0.2027  8.4907
Std   0.0139  0.0638  0.0867  0.2710  0.0341  1.6267
              Model     MAE     MSE    RMSE      R2   RMSLE   MAPE
0  Ridge Regression  0.1521  0.0826  0.2874  0.2583  0.1682  6.077




🏃 View run PyCaret_ridge_246418_Daily at: http://localhost:8001/#/experiments/3/runs/b54aa8e3552e48ff878ce6b13c052700
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.2018  0.2167  0.4655 -0.0007  0.2550  3.5500
1     0.1536  0.0786  0.2803 -0.0048  0.1857  4.5847
2     0.1221  0.0400  0.2001 -0.0074  0.1434  4.2400
Mean  0.1592  0.1118  0.3153 -0.0043  0.1947  4.1249
Std   0.0328  0.0758  0.1111  0.0027  0.0460  0.4302


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.2018  0.2167  0.4655 -0.0007  0.2551  3.5478
1     0.1504  0.0782  0.2797 -0.0003  0.1850  4.6492
2     0.1221  0.0400  0.2001 -0.0074  0.1434  4.2377
Mean  0.1581  0.1117  0.3151 -0.0028  0.1945  4.1449
Std   0.0330  0.0759  0.1112  0.0032  0.0461  0.4544
         Model     MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Elastic Net  0.1601  0.1114  0.3337 -0.0  0.2024  3.7818




🏃 View run PyCaret_en_246418_Daily at: http://localhost:8001/#/experiments/3/runs/3af51a797a06435f832f20bfaaccaa94
🧪 View experiment at: http://localhost:8001/#/experiments/3
🏃 View run SARIMAX_103533_Daily at: http://localhost:8001/#/experiments/3/runs/9d087c1d7af94902898ba511795a4d93
🧪 View experiment at: http://localhost:8001/#/experiments/3
                    Description             Value
0                    Session id                42
1                        Target    TomorrowReturn
2                   Target type        Regression
3           Original data shape         (225, 59)
4        Transformed data shape         (225, 73)
5   Transformed train set shape         (157, 73)
6    Transformed test set shape          (68, 73)
7              Numeric features                19
8          Categorical features                11
9      Rows with missing values              5.8%
10                   Preprocess              True
11              Imputation type            simple
12 

                                                         

               MAE           MSE          RMSE            R2   RMSLE     MAPE
Fold                                                                         
0     1.773000e-01  4.326000e-01  6.577000e-01  9.390000e-02  0.2694   0.4882
1     7.098079e+07  1.027628e+17  3.205663e+08 -2.829504e+18  5.1328  10.6496
2     4.540000e-02  1.770000e-02  1.329000e-01 -9.210000e-02  0.1102   1.0000
Mean  2.366026e+07  3.425425e+16  1.068554e+08 -9.431681e+17  1.8375   4.0459
Std   3.346067e+07  4.844283e+16  1.511164e+08  1.333841e+18  2.3311   4.6742


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 2 candidates, totalling 6 fits


                                                         

             MAE           MSE         RMSE            R2   RMSLE        MAPE
Fold                                                                         
0     48091.9326  8.038752e+10  283526.9251 -1.683911e+11  2.9919      0.4882
1     11637.7538  2.762120e+09   52555.8765 -7.605315e+10  2.9436      8.8850
2      4360.1202  3.683850e+08   19193.3571 -2.276259e+10  2.9072  99323.8505
Mean  21363.2689  2.783934e+10  118425.3862 -8.906895e+10  2.9476  33111.0746
Std   19132.1211  3.717002e+10  117536.2441  6.016075e+10  0.0347  46819.5030
               Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Linear Regression  0.1333  0.1027  0.3204  0.6308  0.1522  1.4058




🏃 View run PyCaret_lr_103533_Daily at: http://localhost:8001/#/experiments/3/runs/a3db7e61e1e64a2c80e5d41ddf48b587
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.1681  0.4935  0.7025 -0.0337  0.3045  1.0057
1     0.0813  0.0362  0.1904  0.0021  0.1322  0.8557
2     0.1075  0.0283  0.1682 -0.7479  0.1061  0.9832
Mean  0.1190  0.1860  0.3537 -0.2598  0.1809  0.9482
Std   0.0364  0.2174  0.2468  0.3454  0.0881  0.0661


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.1681  0.4935  0.7025 -0.0337  0.3046  1.0057
1     0.0838  0.0360  0.1897  0.0089  0.1338  0.7538
2     0.1019  0.0293  0.1711 -0.8081  0.1088  1.0239
Mean  0.1180  0.1862  0.3544 -0.2777  0.1824  0.9278
Std   0.0362  0.2173  0.2462  0.3755  0.0870  0.1233
              Model    MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Lasso Regression  0.154  0.2782  0.5274  0.0  0.2241  1.0345




🏃 View run PyCaret_lasso_103533_Daily at: http://localhost:8001/#/experiments/3/runs/99f4b0b2fd074a74bf7874f5ddbe01bb
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.1910  0.4963  0.7045 -0.0396  0.3017  0.8798
1     0.2807  0.2409  0.4908 -5.6333  0.2696  3.2991
2     0.1441  0.0468  0.2164 -1.8927  0.1561  2.3365
Mean  0.2053  0.2613  0.4706 -2.5219  0.2425  2.1718
Std   0.0567  0.1841  0.1998  2.3266  0.0625  0.9945


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.1862  0.4930  0.7021 -0.0327  0.3042  0.9156
1     0.2300  0.1295  0.3599 -2.5670  0.2282  2.2669
2     0.1311  0.0372  0.1928 -1.2965  0.1248  1.7268
Mean  0.1824  0.2199  0.4183 -1.2987  0.2191  1.6364
Std   0.0404  0.1968  0.2120  1.0346  0.0735  0.5553




              Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Ridge Regression  0.1733  0.2131  0.4617  0.2338  0.1992  1.6051




🏃 View run PyCaret_ridge_103533_Daily at: http://localhost:8001/#/experiments/3/runs/af1668cef34045569df89965b72e95d0
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.1681  0.4935  0.7025 -0.0337  0.3045  1.0057
1     0.0874  0.0362  0.1902  0.0040  0.1350  0.7253
2     0.1049  0.0298  0.1726 -0.8411  0.1107  1.0351
Mean  0.1201  0.1865  0.3551 -0.2903  0.1834  0.9220
Std   0.0347  0.2171  0.2457  0.3898  0.0862  0.1396


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.1681  0.4935  0.7025 -0.0337  0.3046  1.0057
1     0.0813  0.0364  0.1907 -0.0011  0.1323  0.8677
2     0.1082  0.0283  0.1681 -0.7466  0.1061  0.9785
Mean  0.1192  0.1860  0.3538 -0.2605  0.1810  0.9506
Std   0.0363  0.2174  0.2468  0.3440  0.0880  0.0597
         Model    MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Elastic Net  0.154  0.2782  0.5274  0.0  0.2241  1.0345




🏃 View run PyCaret_en_103533_Daily at: http://localhost:8001/#/experiments/3/runs/6511447f07564349af2845854e8ce801
🧪 View experiment at: http://localhost:8001/#/experiments/3
⏭️ Skipping 1219-Daily (not enough data)
🏃 View run SARIMAX_34193_Short at: http://localhost:8001/#/experiments/3/runs/70c295e049dc44ceb57747bb00f27928
🧪 View experiment at: http://localhost:8001/#/experiments/3
                    Description             Value
0                    Session id                42
1                        Target    TomorrowReturn
2                   Target type        Regression
3           Original data shape         (249, 59)
4        Transformed data shape         (249, 71)
5   Transformed train set shape         (174, 71)
6    Transformed test set shape          (75, 71)
7              Ordinal features                 2
8              Numeric features                19
9          Categorical features                11
10     Rows with missing values              2.4%
11           

25/09/12 15:32:05 WARN HeartbeatReceiver: Removing executor 0 with no recent heartbeats: 155907 ms exceeds timeout 120000 ms
25/09/12 15:33:15 ERROR TaskSchedulerImpl: Lost executor 0 on 10.20.10.19: Executor heartbeat timed out after 155907 ms
25/09/12 15:33:56 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_52_43 !
25/09/12 15:33:57 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_211_228 !
25/09/12 15:33:57 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_124_3 !
25/09/12 15:33:57 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_56_13 !
25/09/12 15:33:57 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_20_41 !
25/09/12 15:33:57 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_56_10 !
25/09/12 15:33:57 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_198_33 !
25/09/12 15:33:58 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_211_219 

         MAE      MSE    RMSE         R2   RMSLE     MAPE
Fold                                                     
0     0.5132  12.4037  3.5219    -0.0159  0.4608   1.0000
1     2.5475  64.7223  8.0450 -2517.1991  0.9967  32.2882
2     0.5056  12.4129  3.5232    -0.0183  0.4604   1.0000
Mean  1.1887  29.8463  5.0300  -839.0778  0.6393  11.4294
Std   0.9608  24.6611  2.1319  1186.6110  0.2527  14.7494


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 2 candidates, totalling 6 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE      MSE    RMSE         R2   RMSLE     MAPE
Fold                                                     
0     0.5132  12.4037  3.5219    -0.0159  0.4608   1.0000
1     2.5475  64.7223  8.0450 -2517.1991  0.9967  32.2882
2     0.5056  12.4129  3.5232    -0.0183  0.4604   1.0000
Mean  1.1887  29.8463  5.0300  -839.0778  0.6393  11.4294
Std   0.9608  24.6611  2.1319  1186.6110  0.2527  14.7494
               Model     MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Linear Regression  0.0017  0.0001  0.0098  1.0  0.0093  0.0019




🏃 View run PyCaret_lr_34193_Short at: http://localhost:8001/#/experiments/3/runs/8dfe321af9824c24b073af39694c0dbf
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.7098  12.0319  3.4687  0.0146  0.4471  1.1622
1     0.2941   0.1006  0.3171 -2.9134  0.2411  1.0799
2     0.7588  11.9793  3.4611  0.0172  0.4655  1.1309
Mean  0.5876   8.0372  2.4157 -0.9605  0.3846  1.1243
Std   0.2085   5.6121  1.4839  1.3809  0.1017  0.0339


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.7321  12.2520  3.5003 -0.0034  0.4655  1.1613
1     0.3475   0.1339  0.3660 -4.2111  0.2791  1.1983
2     0.7024  12.2538  3.5005 -0.0053  0.4655  0.7900
Mean  0.5940   8.2133  2.4556 -1.4066  0.4034  1.0498
Std   0.1747   5.7129  1.4776  1.9830  0.0879  0.1844
              Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Lasso Regression  0.0794  0.1742  0.4174  0.9697  0.0448  0.1518




🏃 View run PyCaret_lasso_34193_Short at: http://localhost:8001/#/experiments/3/runs/c58d6345a5bc4584916e20c2bdcc9255
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.8439  6.5736  2.5639  0.4616  0.4579  2.6798
1     0.2376  0.0690  0.2628 -1.6865  0.2222  0.7237
2     0.8706  6.2657  2.5031  0.4860  0.4736  2.5535
Mean  0.6507  4.3028  1.7766 -0.2463  0.3846  1.9857
Std   0.2923  2.9963  1.0707  1.0184  0.1150  0.8938


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE     MSE    RMSE      R2   RMSLE    MAPE
Fold                                                
0     0.8245  6.3123  2.5124  0.4830  0.4556  2.5051
1     0.2365  0.0672  0.2592 -1.6137  0.2225  0.7018
2     0.9081  5.9020  2.4294  0.5158  0.4970  3.1190
Mean  0.6564  4.0938  1.7337 -0.2050  0.3917  2.1086
Std   0.2989  2.8522  1.0432  0.9962  0.1208  1.0259
              Model     MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Ridge Regression  0.0024  0.0001  0.0105  1.0  0.0097  0.0038




🏃 View run PyCaret_ridge_34193_Short at: http://localhost:8001/#/experiments/3/runs/ad4b320ccd6a4a63b6cdc5aeaac72fd9
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.7606  11.8251  3.4388  0.0315  0.4525  1.1631
1     0.2935   0.1002  0.3166 -2.8998  0.2406  1.0785
2     0.8421  11.8165  3.4375  0.0306  0.4810  1.4250
Mean  0.6321   7.9139  2.3976 -0.9459  0.3914  1.2222
Std   0.2417   5.5251  1.4715  1.3816  0.1072  0.1475


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.7112  12.0592  3.4726  0.0124  0.4481  1.1621
1     0.2987   0.1032  0.3213 -3.0155  0.2443  1.0901
2     0.7508  12.0032  3.4646  0.0153  0.4647  1.0927
Mean  0.5869   8.0552  2.4195 -0.9960  0.3857  1.1149
Std   0.2044   5.6230  1.4837  1.4280  0.1002  0.0333
         Model     MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Elastic Net  0.0994  0.2731  0.5226  0.9525  0.0556  0.1905




🏃 View run PyCaret_en_34193_Short at: http://localhost:8001/#/experiments/3/runs/d7334c2df56c4e03b26a8b6f545f8bca
🧪 View experiment at: http://localhost:8001/#/experiments/3
🏃 View run SARIMAX_59373_Short at: http://localhost:8001/#/experiments/3/runs/c03ef2be93204ea0946f19cec3397ade
🧪 View experiment at: http://localhost:8001/#/experiments/3
                    Description             Value
0                    Session id                42
1                        Target    TomorrowReturn
2                   Target type        Regression
3           Original data shape         (173, 59)
4        Transformed data shape         (173, 71)
5   Transformed train set shape         (121, 71)
6    Transformed test set shape          (52, 71)
7              Ordinal features                 2
8              Numeric features                19
9          Categorical features                11
10     Rows with missing values              3.5%
11                   Preprocess              True
12   

                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.0430   0.0378  0.1945 -0.7828  0.1395  0.0000
1     0.0002   0.0000  0.0011  1.0000  0.0007  0.0031
2     0.5956  12.9723  3.6017  0.0793  0.5099  0.0000
Mean  0.2129   4.3367  1.2658  0.0989  0.2167  0.0010
Std   0.2712   6.1063  1.6536  0.7279  0.2149  0.0015


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 2 candidates, totalling 6 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.0430   0.0378  0.1945 -0.7828  0.1395  0.0000
1     0.0002   0.0000  0.0011  1.0000  0.0007  0.0031
2     0.5956  12.9723  3.6017  0.0793  0.5099  0.0000
Mean  0.2129   4.3367  1.2658  0.0989  0.2167  0.0010
Std   0.2712   6.1063  1.6536  0.7279  0.2149  0.0015
               Model  MAE  MSE  RMSE   R2  RMSLE  MAPE
0  Linear Regression  0.0  0.0   0.0  1.0    0.0   0.0




🏃 View run PyCaret_lr_59373_Short at: http://localhost:8001/#/experiments/3/runs/06afedd5dcdc4529af2a08028434634f
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.2667   0.0924  0.3039 -3.3522  0.2264  1.2580
1     0.8924  14.0553  3.7490  0.0026  0.5265  1.1057
2     0.8846  14.0537  3.7488  0.0026  0.5214  1.1515
Mean  0.6813   9.4004  2.6006 -1.1157  0.4248  1.1718
Std   0.2931   6.5818  1.6240  1.5814  0.1403  0.0638


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.2567   0.0871  0.2951 -3.1035  0.2192  1.2474
1     0.9119  13.8186  3.7173  0.0193  0.5344  1.0395
2     0.8883  13.8273  3.7185  0.0186  0.5185  1.1773
Mean  0.6856   9.2443  2.5770 -1.0218  0.4240  1.1547
Std   0.3035   6.4751  1.6135  1.4720  0.1450  0.0864
              Model    MAE     MSE    RMSE      R2   RMSLE    MAPE
0  Lasso Regression  0.083  0.1516  0.3893  0.9771  0.0457  0.1768




🏃 View run PyCaret_lasso_59373_Short at: http://localhost:8001/#/experiments/3/runs/170b82f4491d473a8b590c32a7344c36
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.2248   0.0607  0.2464 -1.8605  0.2122  0.3049
1     0.7246   2.6269  1.6208  0.8136  0.4312  2.8727
2     1.1504  18.4093  4.2906 -0.3065  0.5792  1.6254
Mean  0.6999   7.0323  2.0526 -0.4512  0.4075  1.6010
Std   0.3783   8.1127  1.6790  1.0965  0.1508  1.0484


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.2220   0.0599  0.2447 -1.8211  0.2111  0.2477
1     0.6106   1.6789  1.2957  0.8809  0.3787  2.5879
2     1.1959  27.0147  5.1976 -0.9173  0.5915  1.2363
Mean  0.6762   9.5845  2.2460 -0.6192  0.3938  1.3573
Std   0.4003  12.3427  2.1307  1.1230  0.1557  0.9592
              Model     MAE     MSE    RMSE   R2   RMSLE    MAPE
0  Ridge Regression  0.0032  0.0002  0.0138  1.0  0.0129  0.0033




🏃 View run PyCaret_ridge_59373_Short at: http://localhost:8001/#/experiments/3/runs/514750242fcc4ef5a2bdffc035180ab6
🧪 View experiment at: http://localhost:8001/#/experiments/3


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.2659   0.0920  0.3032 -3.3322  0.2259  1.2572
1     0.9149  13.7863  3.7130  0.0216  0.5375  1.0292
2     0.8889  13.7955  3.7142  0.0209  0.5197  1.1814
Mean  0.6899   9.2246  2.5768 -1.0966  0.4277  1.1559
Std   0.3000   6.4577  1.6077  1.5808  0.1429  0.0948


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 3 folds for each of 10 candidates, totalling 30 fits


                                                         

         MAE      MSE    RMSE      R2   RMSLE    MAPE
Fold                                                 
0     0.2742   0.0964  0.3105 -3.5418  0.2318  1.2659
1     0.8899  14.0898  3.7536  0.0001  0.5271  1.1143
2     0.8841  14.0865  3.7532  0.0003  0.5233  1.1482
Mean  0.6827   9.4242  2.6058 -1.1805  0.4274  1.1761
Std   0.2889   6.5958  1.6230  1.6697  0.1383  0.0650




         Model     MAE     MSE    RMSE      R2   RMSLE   MAPE
0  Elastic Net  0.1046  0.2403  0.4902  0.9637  0.0564  0.223




🏃 View run PyCaret_en_59373_Short at: http://localhost:8001/#/experiments/3/runs/b5809f6a3c1e46d89c9926c49db3cd43
🧪 View experiment at: http://localhost:8001/#/experiments/3
⏭️ Skipping 292453-Short (not enough data)
