<a href="https://colab.research.google.com/github/Beavis1986/Pulse-bot-/blob/main/ColExpert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

from datetime import datetime

# Automatically set today's date when the notebook runs
end_date = datetime.now().strftime("%Y-%m-%d")
print(f"📆 Auto-set end_date: {end_date}")

📆 Auto-set end_date: 2025-07-14


In [2]:

# --- STREAMLIT DASHBOARD EXPORT ---
!pip install streamlit --quiet

with open("stock_dashboard.py", "w") as f:
    f.write('''
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.title("Colexpert Strategy Dashboard")

log_df = pd.read_csv("signal_log.csv", parse_dates=["Date"])
log_df.set_index("Date", inplace=True)

st.line_chart(log_df[["Cumulative Return", "Cumulative Vol-Weighted", "Cumulative Parlay"]])

st.dataframe(log_df.sort_values("Return", ascending=False).head(15)[["Return", "Confidence", "Top (Buy)", "Bottom (Short)"]])

st.write("Latest Signal:")
latest = log_df.iloc[-1]
st.success(f"{latest.name.date()} | Confidence: {latest['Confidence']} | BUY: {latest['Top (Buy)']} | SHORT: {latest['Bottom (Short)']} | Return: {round(latest['Return']*100, 2)}%")
''')

# Save log
log_df.reset_index().to_csv("signal_log.csv", index=False)
print("✅ Dashboard code exported as stock_dashboard.py — run with: streamlit run stock_dashboard.py")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m91.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h

NameError: name 'log_df' is not defined

In [None]:

# --- SETUP ---
!pip install yfinance pandas numpy matplotlib --quiet

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# --- CONFIG ---
from datetime import datetime

# Automatically set today's date when the notebook runs
end_date = datetime.now().strftime("%Y-%m-%d")
print(f"📆 Auto-set end_date: {end_date}")

# Toggle modules
USE_STRATEGY_A = True
USE_STRATEGY_B = True
USE_STRATEGY_C = True
USE_PARLAY_MODE = True
USE_WEIGHTED_ARF_MIL = True
USE_SIGNAL_PRINT = True

# --- FETCH DATA ---
data = yf.download(tickers, start=start_date, end=end_date)["Close"].dropna()
returns = data.pct_change().dropna()

# --- STRATEGIES ---
def strategy_a(df): return df.rolling(window=5).mean()
def strategy_b(df): return -df.rolling(window=10).std()
def strategy_c(df):
    momentum = df.rolling(window=3).mean()
    volatility = df.rolling(window=5).std()
    return momentum / (volatility + 1e-6)

def compute_arf_mil_weights(df):
    entropy = -np.sum(df * np.log(df + 1e-9), axis=1)
    return 1 / (1 + entropy)

def compute_confidence_score(row):
    if row["Return"] > 0.09:
        return "HIGH"
    elif row["Return"] > 0.05:
        return "MEDIUM"
    else:
        return "LOW"

# --- EXECUTE ENGINE ---
log = []
for i in range(20, len(returns)):
    today = returns.index[i]
    subset = returns.iloc[i-20:i]
    scores = {}
    if USE_STRATEGY_A:
        scores["A"] = strategy_a(subset).iloc[-1]
    if USE_STRATEGY_B:
        scores["B"] = strategy_b(subset).iloc[-1]
    if USE_STRATEGY_C:
        scores["C"] = strategy_c(subset).iloc[-1]

    combined_score = sum(scores.values()) / len(scores)
    top_assets = combined_score.sort_values(ascending=False).head(2).index.tolist()
    bottom_assets = combined_score.sort_values().head(2).index.tolist()

    daily_return = returns.iloc[i][top_assets].mean() - returns.iloc[i][bottom_assets].mean()

    log.append({
        "Date": today,
        "Return": daily_return,
        "Top (Buy)": ", ".join(top_assets),
        "Bottom (Short)": ", ".join(bottom_assets),
    })

log_df = pd.DataFrame(log)
log_df["Confidence"] = log_df.apply(compute_confidence_score, axis=1)

# Parlay mode
if USE_PARLAY_MODE:
    log_df["Parlay Return"] = log_df["Return"].rolling(2).apply(np.prod) - 1
else:
    log_df["Parlay Return"] = 0

# Weighting
if USE_WEIGHTED_ARF_MIL:
    weights = compute_arf_mil_weights(returns.rolling(3).mean().dropna())
    weights = weights.reindex(log_df["Date"]).fillna(1)
    log_df["Weighted Return"] = log_df["Return"] * weights.values
else:
    log_df["Weighted Return"] = log_df["Return"]

# Cumulative plots
log_df.set_index("Date", inplace=True)
log_df["Cumulative Return"] = (1 + log_df["Return"]).cumprod()
log_df["Cumulative Weighted Return"] = (1 + log_df["Weighted Return"]).cumprod()
log_df["Cumulative Parlay"] = (1 + log_df["Parlay Return"].fillna(0)).cumprod()

log_df[["Cumulative Return", "Cumulative Weighted Return", "Cumulative Parlay"]].plot(
    title="Colexpert Strategy Performance", figsize=(12,6))
plt.ylabel("Growth")
plt.grid(True)
plt.tight_layout()
plt.show()

# Signal output
if USE_SIGNAL_PRINT:
    print("\n🔥 Top Ranked Trade Days (by raw return):")
    display(log_df.sort_values("Return", ascending=False).head(10)[["Return", "Confidence", "Top (Buy)", "Bottom (Short)"]])

    print("\n📈 Today's Signal:")
    latest = log_df.iloc[-1]
    print(f"{latest.name.date()} | Confidence: {latest['Confidence']} | BUY: {latest['Top (Buy)']} | SHORT: {latest['Bottom (Short)']} | Return: {round(latest['Return']*100, 2)}%")

In [None]:

# --- BACKTEST METRICS LAYER ---
def backtest_metrics(df):
    total_return = df["Cumulative Return"].iloc[-1]
    num_years = (df.index[-1] - df.index[0]).days / 365.25
    cagr = total_return ** (1 / num_years) - 1
    sharpe = df["Return"].mean() / df["Return"].std() * np.sqrt(252)
    max_drawdown = (df["Cumulative Return"].cummax() - df["Cumulative Return"]).max()
    return {
        "CAGR": round(cagr * 100, 2),
        "Sharpe Ratio": round(sharpe, 2),
        "Max Drawdown": round(max_drawdown * 100, 2)
    }

# Apply to your main log_df
metrics = backtest_metrics(log_df)
print("📊 Backtest Performance:")
for k, v in metrics.items():
    print(f"{k}: {v}")

In [None]:

# --- FIXED VOLATILITY WEIGHTING LAYER ---
def apply_volatility_weights(df, returns, lookback=5):
    volatility = returns.rolling(window=lookback).std()
    avg_vol = volatility.mean(axis=1)
    vol_weights = 1 / (1 + avg_vol)
    vol_weights = vol_weights.reindex(df.index).bfill()
    df["Volatility Weighted Return"] = df["Return"] * vol_weights
    df["Cumulative Volatility Weighted"] = (1 + df["Volatility Weighted Return"]).cumprod()
    return df

log_df = apply_volatility_weights(log_df, returns)

In [None]:

# --- FACTOR FILTER (STATIC SNAPSHOT) ---
# You can expand this using real financials via APIs later
low_vol_names = returns.std().sort_values().head(10).index.tolist()
print("🧠 Conservative Strategy Top Picks (Low Volatility):")
print(low_vol_names)

In [None]:

# --- CORRELATION FILTER ---
# Drop tickers with > 0.85 correlation to the benchmark
cor_matrix = returns.corr()
corr_to_spy = cor_matrix["AAPL"]  # substitute for SPY
low_corr_assets = corr_to_spy[corr_to_spy < 0.85].index.tolist()
print("🧊 Low Correlation Assets Selected:")
print(low_corr_assets)

In [None]:

# --- CONFIDENCE SCORING ENHANCER ---
confidence_weights = log_df["Confidence"].map({"LOW": 0.3, "MEDIUM": 0.6, "HIGH": 1.0})
log_df["Confidence Weighted Return"] = log_df["Return"] * confidence_weights
log_df["Cumulative Confidence Weighted"] = (1 + log_df["Confidence Weighted Return"]).cumprod()

In [None]:

# --- MACRO FILTER LAYER ---
vix_proxy = returns.std(axis=1).rolling(5).mean()
macro_filter = vix_proxy < vix_proxy.median()
log_df["Macro Regime"] = macro_filter.reindex(log_df.index).fillna(False)
log_df["Macro Filtered Return"] = log_df["Return"] * log_df["Macro Regime"]
log_df["Cumulative Macro Filtered"] = (1 + log_df["Macro Filtered Return"]).cumprod()

In [None]:

# --- MASTER COMPARISON PLOT (SAFE) ---
columns_to_plot = [
    "Cumulative Return",
    "Cumulative Weighted Return",
    "Cumulative Volatility Weighted",
    "Cumulative Confidence Weighted",
    "Cumulative Macro Filtered",
    "Cumulative Parlay"
]

available = [col for col in columns_to_plot if col in log_df.columns]
log_df[available].plot(title="📊 Colexpert Strategy Curve Comparison", figsize=(14, 7))
plt.ylabel("Growth")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:

# --- COLEXPERT MACHINE LEARNING UPGRADE ---

!pip install xgboost scikit-learn seaborn --quiet

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# --- 1. FEATURE ENGINEERING ---
log_df["DayOfWeek"] = log_df.index.dayofweek
log_df["IsMacroSafe"] = log_df.get("Macro Regime", False).astype(int)
log_df["Volatility"] = returns.rolling(5).std().mean(axis=1).reindex(log_df.index).fillna(0)

# Confidence encoding
conf_map = {"LOW": 0, "MEDIUM": 1, "HIGH": 2}
log_df["ConfidenceCode"] = log_df["Confidence"].map(conf_map).fillna(0)

# --- 2. LABELING ---
log_df["GoodTrade"] = (log_df["Return"] > 0.01).astype(int)

# --- 3. TRAIN TEST SPLIT ---
features = ["ConfidenceCode", "IsMacroSafe", "DayOfWeek", "Volatility"]
X = log_df[features]
y = log_df["GoodTrade"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# --- 4. MODEL TRAINING ---
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# --- 5. EVALUATION ---
print("🔍 Model Evaluation:")
print(classification_report(y_test, model.predict(X_test)))

cm = confusion_matrix(y_test, model.predict(X_test))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Bad", "Good"], yticklabels=["Bad", "Good"])
plt.title("🧠 Colexpert ML Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# --- 6. APPLY TO TODAY'S SIGNAL ---
today_row = log_df.iloc[-1]
today_features = [[
    conf_map.get(today_row["Confidence"], 0),
    int(today_row.get("IsMacroSafe", 0)),
    today_row.name.dayofweek,
    log_df["Volatility"].iloc[-1]
]]

prediction = model.predict(today_features)[0]
proba = model.predict_proba(today_features)[0][1]

# --- 7. DECISION ASSIST ---
print(f"📈 Today's ML Prediction: {'✅ GOOD' if prediction else '❌ BAD'} | Confidence Score: {round(proba * 100, 1)}%")
print(f"Buy: {today_row['Top (Buy)']} | Short: {today_row['Bottom (Short)']} | Return: {round(today_row['Return']*100,2)}%")

In [None]:

# 📘 ColExpert AutoLogger v1.0 — Logs Every Prediction Run

import os
import pandas as pd
from datetime import datetime

# --- CONFIG ---
LOG_PATH = "colexpert_run_log.csv"
MODEL_VERSION = "v1.0"

def log_run(summary_dict):
    """Appends a log row to CSV after each prediction"""
    log_row = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "model_version": MODEL_VERSION,
        "num_assets": summary_dict.get("num_assets"),
        "top_buys": ", ".join(summary_dict.get("top_buys", [])),
        "top_shorts": ", ".join(summary_dict.get("top_shorts", [])),
        "signal_strength": summary_dict.get("signal_strength", "-"),
        "confidence_score": round(summary_dict.get("confidence_score", 0), 3),
        "model_accuracy": round(summary_dict.get("accuracy", 0), 3)
    }

    log_df = pd.DataFrame([log_row])
    if os.path.exists(LOG_PATH):
        log_df.to_csv(LOG_PATH, mode="a", header=False, index=False)
    else:
        log_df.to_csv(LOG_PATH, index=False)
    print(f"📝 ColExpert Run Logged to → {LOG_PATH}")