![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

In [24]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
# from ipywidgets import interact, FloatSlider

qb = QuantBook()


In [25]:
# STEP 1: Add tech/growth symbols manually
tickers = [
    "AAPL", "MSFT", "NVDA", "GOOG", "META", "TSLA", "AMZN", "AVGO", "ADBE", "AMD",
    "ORCL", "INTC", "CRM", "CSCO", "QCOM", "TXN", "NFLX", "MU", "ADI", "IBM",
    "AMAT", "LRCX", "NXPI", "KLAC", "PANW", "CDNS", "SNPS", "ANET", "WDAY", "ZS"
]
symbols = [qb.add_equity(ticker).symbol for ticker in tickers]
symbols = [s.value for s in symbols]
symbols[0:5]

In [26]:
# STEP 2: Get fine fundamentals
fundamentals = qb.get_fundamental(symbols)

print(f"✅ Total fundamentals received: {len(fundamentals)}")
print("Example symbols loaded:", list(fundamentals.keys())[:5])


# Helper to avoid NaNs
def safe(val, default=0.0):
    return val if val is not None and not np.isnan(val) else default


In [27]:
# fundamentals.values

In [28]:
# STEP 3: Filter based on fundamentals
filtered = []
for fund_list in fundamentals.values:
    for f in fund_list:  # Now f is a Fundamental object
        try:
            if f.MarketCap < 1e9:
                continue
            pe = safe(f.ValuationRatios.PERatio)
            if pe <= 5:
                continue
            equity = safe(f.FinancialStatements.BalanceSheet.TotalEquity.Value)
            debt = safe(f.FinancialStatements.BalanceSheet.TotalDebt.Value)
            if equity <= 0:
                continue
            d2e = debt / equity
            if d2e > 1.0:
                continue
            roe = safe(f.OperationRatios.ROE.OneYear)
            if roe < 0.10:
                continue
            rev_growth = safe(f.OperationRatios.RevenueGrowth.OneYear)
            if rev_growth < 0:
                continue
            liabilities = safe(f.FinancialStatements.BalanceSheet.CurrentLiabilities.Value)
            assets = safe(f.FinancialStatements.BalanceSheet.CurrentAssets.Value)
            if liabilities <= 0 or (assets / liabilities) < 1.0:
                continue
            filtered.append(f)
        except Exception as e:
            continue
# filtered

In [29]:
# checking filtered data:
# Convert filtered Fundamental objects to DataFrame
rows = []
for f in filtered:
    try:
        rows.append({
            "Symbol": f.Symbol.Value,
            "Date": f.EndTime.strftime("%Y-%m-%d"),
            "MarketCap": f.MarketCap,
            "PE": safe(f.ValuationRatios.PERatio),
            "ROE": safe(f.OperationRatios.ROE.OneYear),
            "DebtToEquity": safe(f.FinancialStatements.BalanceSheet.TotalDebt.Value) / \
                            max(safe(f.FinancialStatements.BalanceSheet.TotalEquity.Value), 1),
            "RevGrowth": safe(f.OperationRatios.RevenueGrowth.OneYear),
            "CurrentRatio": safe(f.FinancialStatements.BalanceSheet.CurrentAssets.Value) / \
                            max(safe(f.FinancialStatements.BalanceSheet.CurrentLiabilities.Value), 1)
        })
    except Exception as e:
        continue

df = pd.DataFrame(rows)
df.sort_values("MarketCap", ascending=False).head(10)


In [30]:
# Keep only latest fundamental per symbol
latest_fundamentals = {}
for f in filtered:
    symbol = f.Symbol.Value
    if symbol not in latest_fundamentals or f.EndTime > latest_fundamentals[symbol].EndTime:
        latest_fundamentals[symbol] = f

print(f"✅ Unique symbols with latest fundamentals: {len(latest_fundamentals)}")

In [31]:
data = []
names = []

for f in latest_fundamentals.values():
    try:
        roe = safe(getattr(f.OperationRatios.ROE, 'OneYear', None))
        pe = safe(f.ValuationRatios.PERatio)
        rev_growth = safe(getattr(f.OperationRatios.RevenueGrowth, 'OneYear', None))
        equity = safe(f.FinancialStatements.BalanceSheet.TotalEquity.Value)
        debt = safe(f.FinancialStatements.BalanceSheet.TotalDebt.Value)
        d2e = debt / equity if equity > 0 else 1
        gross_margin = safe(getattr(f.OperationRatios.GrossMargin, 'Value', None))
        rd = safe(f.FinancialStatements.IncomeStatement.ResearchAndDevelopment.Value)
        revenue = safe(f.FinancialStatements.IncomeStatement.TotalRevenue.Value)
        rd_ratio = rd / revenue if revenue > 0 else 0

        if revenue == 0 or pe == 0:
            print(f"⚠️ Skipping {f.Symbol.Value} due to zero PE or revenue")
            continue

        row = [
            roe,
            rev_growth,
            1 / pe,
            1 / (1 + d2e),
            np.log(f.MarketCap + 1),
            gross_margin,
            rd_ratio
        ]

        data.append(row)
        names.append(f.Symbol.Value)
    except Exception as e:
        print(f"❌ Failed on {f.Symbol.Value}: {e}")
        continue

print(f"✅ Final data rows: {len(data)}")
print(f"✅ Unique symbols: {names}")



In [32]:

# STEP 5: Normalize and rank
scaler = MinMaxScaler()
X = scaler.fit_transform(data)
weights = [0.2, 0.25, 0.15, 0.05, 0.1, 0.05, 0.2]
scores = X.dot(weights)

ranked = sorted(zip(names, scores), key=lambda x: x[1], reverse=True)

# STEP 6: Show top 20
columns = ["ROE", "RevGrowth", "1/PE", "1/(1+D/E)", "Log(MarketCap)", "GrossMargin", "RD/Revenue"]
df2 = pd.DataFrame(data, index=names, columns=columns)

# Add score column
df2["Score"] = scores

# Rank and get top 20
top_df = df2.sort_values("Score", ascending=False).head(20).reset_index()
top_df = top_df.rename(columns={"index": "Symbol"})
# Reorder columns: Score first, then the rest
cols = ["Symbol", "Score"] + [col for col in top_df.columns if col not in ["Symbol", "Score"]]
top_df = top_df[cols]

top_df

In [33]:
# Set seaborn style
sns.set(style="whitegrid", context="notebook")

# Plot Score vs each feature
features = ["ROE", "RevGrowth", "1/PE", "1/(1+D/E)", "Log(MarketCap)", "GrossMargin", "RD/Revenue"]

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15, 12))
axes = axes.flatten()

for i, feature in enumerate(features):
    sns.scatterplot(
        x=top_df[feature],
        y=top_df["Score"],
        ax=axes[i],
        color='steelblue'
    )
    axes[i].set_title(f"Score vs {feature}")
    axes[i].set_xlabel(feature)
    axes[i].set_ylabel("Score")

# Hide unused subplots if any
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()


In [34]:

# def update_weights(w1=0.25, w2=0.25, w3=0.1, w4=0.1, w5=0.1, w6=0.1, w7=0.1):
#     weights = np.array([w1, w2, w3, w4, w5, w6, w7])
#     weights = weights / weights.sum()  # Normalize weights
#     new_scores = X.dot(weights)
    
#     # Update df
#     df["Score"] = new_scores
#     top_df = df.sort_values("Score", ascending=False).head(20).reset_index()
#     top_df = top_df.rename(columns={"index": "Symbol"})
#     cols = ["Symbol", "Score"] + [col for col in top_df.columns if col not in ["Symbol", "Score"]]
#     display(top_df[cols])

# interact(
#     update_weights,
#     w1=FloatSlider(value=0.25, min=0.0, max=1.0, step=0.05, description="ROE"),
#     w2=FloatSlider(value=0.25, min=0.0, max=1.0, step=0.05, description="RevGrowth"),
#     w3=FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05, description="1/PE"),
#     w4=FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05, description="1/(1+D/E)"),
#     w5=FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05, description="Log(MC)"),
#     w6=FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05, description="GrossMargin"),
#     w7=FloatSlider(value=0.1, min=0.0, max=1.0, step=0.05, description="RD/Rev")
# )


In [35]:


# Run PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Run KMeans
k = 4  # You can tweak this
kmeans = KMeans(n_clusters=k, n_init=10, random_state=42)
clusters = kmeans.fit_predict(X)

# Create DataFrame for plotting
pca_df = pd.DataFrame({
    "PC1": X_pca[:, 0],
    "PC2": X_pca[:, 1],
    "Symbol": names,
    "Score": scores,
    "Cluster": clusters
})


In [36]:
# Plot clusters
plt.figure(figsize=(10, 7))
sns.scatterplot(
    data=pca_df,
    x="PC1", y="PC2",
    hue="Cluster",
    palette="Set2",
    s=80,
    edgecolor="black"
)

# Annotate top stocks by Score
for _, row in pca_df.sort_values("Score", ascending=False).head(30).iterrows():
    plt.text(row["PC1"], row["PC2"], row["Symbol"], fontsize=9, weight="bold")

plt.title("PCA Clusters of Stocks Based on Financial Features")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend(title="Cluster")
plt.grid(True)
plt.tight_layout()
plt.show()


In [37]:
from sklearn.manifold import TSNE

# Reduce dimensionality using t-SNE
tsne = TSNE(n_components=2, perplexity=10, learning_rate='auto', init='pca', random_state=42)
X_tsne = tsne.fit_transform(X)

# Prepare DataFrame
tsne_df = pd.DataFrame({
    "TSNE1": X_tsne[:, 0],
    "TSNE2": X_tsne[:, 1],
    "Symbol": names,
    "Score": scores,
    "Cluster": clusters
})

# Plot t-SNE
plt.figure(figsize=(10, 7))
sns.scatterplot(
    data=tsne_df,
    x="TSNE1", y="TSNE2",
    hue="Cluster",
    palette="Set1",
    s=80,
    edgecolor="black"
)

# Annotate top-scoring stocks
for _, row in tsne_df.sort_values("Score", ascending=False).head(30).iterrows():
    plt.text(row["TSNE1"], row["TSNE2"], row["Symbol"], fontsize=9, weight="bold")

plt.title("t-SNE Clustering of Stocks")
plt.xlabel("t-SNE 1")
plt.ylabel("t-SNE 2")
plt.legend(title="Cluster")
plt.grid(True)
plt.tight_layout()
plt.show()



In [38]:
import umap.umap_ as umap  # QuantConnect supports umap in Research environment

# Fit UMAP
reducer = umap.UMAP(n_neighbors=10, min_dist=0.2, random_state=42)
X_umap = reducer.fit_transform(X)

# Prepare DataFrame
umap_df = pd.DataFrame({
    "UMAP1": X_umap[:, 0],
    "UMAP2": X_umap[:, 1],
    "Symbol": names,
    "Score": scores,
    "Cluster": clusters
})

In [39]:
# Plot UMAP
plt.figure(figsize=(10, 7))
sns.scatterplot(
    data=umap_df,
    x="UMAP1", y="UMAP2",
    hue="Cluster",
    palette="Set2",
    s=80,
    edgecolor="black"
)

# Annotate top stocks
for _, row in umap_df.sort_values("Score", ascending=False).head(30).iterrows():
    plt.text(row["UMAP1"], row["UMAP2"], row["Symbol"], fontsize=9, weight="bold")

plt.title("UMAP Clustering of Stocks")
plt.xlabel("UMAP 1")
plt.ylabel("UMAP 2")
plt.legend(title="Cluster")
plt.grid(True)
plt.tight_layout()
plt.show()
