diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json index 2a337d7966..8de9478bae 100644 --- a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json @@ -7,6 +7,10 @@ "featureShortDescription": { "03" : "Time series forecasting", "04" : "Question Answering", - "05" : "Sentiment analysis" + "05" : "Sentiment analysis", + "06" : "Text classification", + "07" : "Feature extraction", + "08" : "Text generation", + "12" : "Time series forecasting" } } \ No newline at end of file diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/01 Introduction.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/01 Introduction.html new file mode 100644 index 0000000000..12f5a7c2f5 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/01 Introduction.html @@ -0,0 +1,14 @@ +

This page explains how to use Hugging Face sentiment analysis models in LEAN trading algorithms. These models classify financial text into sentiment categories like positive, negative, and neutral. The following models are available:

+ + + +

All of these models accept text input and return classification labels with confidence scores. You can use them with the Hugging Face transformers library to analyze the sentiment of financial news and social media posts, then use the results to inform trading decisions.

diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/99 Examples.html new file mode 100644 index 0000000000..3333aad922 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/99 Examples.html @@ -0,0 +1,112 @@ +

+ The following examples demonstrate usage of Hugging Face sentiment analysis models. +

+

+ Example 1: News Sentiment Trading +

+

+ The following algorithm selects the most volatile asset at the beginning of each month. + It gets the Tiingo News articles that were released for the asset over the previous 10 days and then feeds them into a sentiment analysis model. + It aggregates the sentiment scores of all the news releases. + If the aggregated sentiment is positive, it enters a long position for the month. + If it's negative, it enters a short position. + You can replace the model name with any of the sentiment analysis models listed on the introduction page. +

+
+
from transformers import pipeline, set_seed
+
+class SentimentAnalysisModelAlgorithm(QCAlgorithm):
+
+    def initialize(self):
+        self.set_start_date(2024, 9, 1)
+        self.set_end_date(2024, 12, 31)
+        self.set_cash(100_000)
+
+        self.universe_settings.resolution = Resolution.DAILY
+        self.universe_settings.schedule.on(self.date_rules.month_start("SPY"))
+        self._universe = self.add_universe(
+            lambda fundamental: [
+                self.history(
+                    [f.symbol for f in sorted(
+                        fundamental, key=lambda f: f.dollar_volume
+                    )[-10:]],
+                    timedelta(365), Resolution.DAILY
+                )['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
+            ]
+        )
+
+        set_seed(1, True)
+
+        # Load the sentiment analysis pipeline.
+        # Replace the model name with any supported sentiment model.
+        self._sentiment_pipeline = pipeline(
+            "text-classification",
+            model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
+        )
+
+        self._last_rebalance_time = datetime.min
+        self.set_warm_up(30, Resolution.DAILY)
+
+    def on_warmup_finished(self):
+        self._trade()
+        self.schedule.on(
+            self.date_rules.month_start("SPY", 1),
+            self.time_rules.midnight,
+            self._trade
+        )
+
+    def on_securities_changed(self, changes):
+        for security in changes.removed_securities:
+            self.remove_security(security.dataset_symbol)
+        for security in changes.added_securities:
+            security.dataset_symbol = self.add_data(
+                TiingoNews, security.symbol
+            ).symbol
+
+    def _trade(self):
+        if (self.is_warming_up or
+            self.time - self._last_rebalance_time < timedelta(14)):
+            return
+
+        # Get the target security.
+        security = self.securities[list(self._universe.selected)[0]]
+
+        # Get the latest news articles.
+        articles = self.history[TiingoNews](
+            security.dataset_symbol, 10, Resolution.DAILY
+        )
+        article_text = [
+            article.description for article in articles
+            if article.description
+        ]
+        if not article_text:
+            return
+
+        # Run sentiment analysis on each article.
+        # Truncate long articles to the model's max length.
+        results = self._sentiment_pipeline(
+            article_text, truncation=True, max_length=512
+        )
+
+        # Aggregate sentiment scores.
+        positive_score = 0
+        negative_score = 0
+        for result in results:
+            label = result['label'].lower()
+            score = result['score']
+            if 'pos' in label:
+                positive_score += score
+            elif 'neg' in label:
+                negative_score += score
+
+        self.plot("Sentiment", "Positive", positive_score)
+        self.plot("Sentiment", "Negative", negative_score)
+
+        # Rebalance based on sentiment.
+        weight = 1 if positive_score > negative_score else -0.25
+        self.set_holdings(
+            security.symbol, weight,
+            liquidate_existing_holdings=True
+        )
+        self._last_rebalance_time = self.time
+
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/metadata.json b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/metadata.json new file mode 100644 index 0000000000..58140d6fce --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/metadata.json @@ -0,0 +1,12 @@ +{ + "type": "metadata", + "values": { + "description": "This page explains how to use Hugging Face sentiment analysis models in LEAN trading algorithms.", + "keywords": "sentiment analysis model, text classification, pre-trained AI model, financial sentiment, free AI models", + "og:description": "This page explains how to use Hugging Face sentiment analysis models in LEAN trading algorithms.", + "og:title": "Sentiment Analysis Models - Documentation QuantConnect.com", + "og:type": "website", + "og:site_name": "Sentiment Analysis Models - QuantConnect.com", + "og:image": "https://cdn.quantconnect.com/docs/i/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis.png" + } +} diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/01 Introduction.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/01 Introduction.html new file mode 100644 index 0000000000..40d4912ce0 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/01 Introduction.html @@ -0,0 +1,10 @@ +

This page explains how to use Hugging Face fill-mask models in LEAN trading algorithms. Fill-mask models predict the most likely word to fill a masked position in a sentence. You can use them to extract text embeddings and build feature vectors from financial text. The following models are available:

+ + + +

These models are useful for extracting text embeddings from financial news. You can feed these embeddings into a downstream classifier or use cosine similarity to measure the semantic similarity between documents.

diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html new file mode 100644 index 0000000000..ca79c40812 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html @@ -0,0 +1,129 @@ +

+ The following examples demonstrate usage of Hugging Face fill-mask models for feature extraction. +

+

+ Example 1: Embedding-Based News Similarity +

+

+ The following algorithm selects a volatile asset at the beginning of each month. + It uses a fill-mask model to extract embeddings from Tiingo News articles. + It then compares the average embedding of recent news to a reference "bullish" and "bearish" embedding. + If the recent news is more similar to the bullish reference, it enters a long position. + You can replace the model name with any of the fill-mask models listed on the introduction page. +

+
+
import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel, set_seed
+
+class FillMaskEmbeddingAlgorithm(QCAlgorithm):
+
+    def initialize(self):
+        self.set_start_date(2024, 9, 1)
+        self.set_end_date(2024, 12, 31)
+        self.set_cash(100_000)
+
+        self.universe_settings.resolution = Resolution.DAILY
+        self.universe_settings.schedule.on(self.date_rules.month_start("SPY"))
+        self._universe = self.add_universe(
+            lambda fundamental: [
+                self.history(
+                    [f.symbol for f in sorted(
+                        fundamental, key=lambda f: f.dollar_volume
+                    )[-10:]],
+                    timedelta(365), Resolution.DAILY
+                )['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
+            ]
+        )
+
+        set_seed(1, True)
+
+        # Load the model and tokenizer.
+        # Replace with any fill-mask model (e.g., google-bert/bert-base-uncased).
+        model_name = "distilbert/distilbert-base-uncased"
+        self._tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self._model = AutoModel.from_pretrained(model_name)
+        self._model.eval()
+
+        # Create reference embeddings for bullish/bearish text.
+        self._bullish_embedding = self._get_embedding(
+            "Stock prices surged on strong earnings and revenue growth."
+        )
+        self._bearish_embedding = self._get_embedding(
+            "Stock prices plunged on weak earnings and declining revenue."
+        )
+
+        self._last_rebalance_time = datetime.min
+        self.set_warm_up(30, Resolution.DAILY)
+
+    def on_warmup_finished(self):
+        self._trade()
+        self.schedule.on(
+            self.date_rules.month_start("SPY", 1),
+            self.time_rules.midnight,
+            self._trade
+        )
+
+    def on_securities_changed(self, changes):
+        for security in changes.removed_securities:
+            self.remove_security(security.dataset_symbol)
+        for security in changes.added_securities:
+            security.dataset_symbol = self.add_data(
+                TiingoNews, security.symbol
+            ).symbol
+
+    def _get_embedding(self, text):
+        """Extract the [CLS] token embedding from the model."""
+        inputs = self._tokenizer(
+            text, return_tensors="pt", truncation=True, max_length=512
+        )
+        with torch.no_grad():
+            outputs = self._model(**inputs)
+        # Use the [CLS] token (first token) embedding.
+        return outputs.last_hidden_state[:, 0, :].squeeze().numpy()
+
+    def _cosine_similarity(self, a, b):
+        return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+
+    def _trade(self):
+        if (self.is_warming_up or
+            self.time - self._last_rebalance_time < timedelta(14)):
+            return
+
+        # Get the target security.
+        security = self.securities[list(self._universe.selected)[0]]
+
+        # Get the latest news articles.
+        articles = self.history[TiingoNews](
+            security.dataset_symbol, 10, Resolution.DAILY
+        )
+        article_text = [
+            article.description for article in articles
+            if article.description
+        ]
+        if not article_text:
+            return
+
+        # Get embeddings for each article and average them.
+        embeddings = [self._get_embedding(text) for text in article_text]
+        avg_embedding = np.mean(embeddings, axis=0)
+
+        # Compare to reference embeddings.
+        bullish_sim = self._cosine_similarity(
+            avg_embedding, self._bullish_embedding
+        )
+        bearish_sim = self._cosine_similarity(
+            avg_embedding, self._bearish_embedding
+        )
+
+        self.plot("Similarity", "Bullish", bullish_sim)
+        self.plot("Similarity", "Bearish", bearish_sim)
+
+        # Rebalance based on similarity.
+        weight = 1 if bullish_sim > bearish_sim else -0.25
+        self.set_holdings(
+            security.symbol, weight,
+            liquidate_existing_holdings=True
+        )
+        self._last_rebalance_time = self.time
+
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/metadata.json b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/metadata.json new file mode 100644 index 0000000000..fd584e3045 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/metadata.json @@ -0,0 +1,12 @@ +{ + "type": "metadata", + "values": { + "description": "This page explains how to use Hugging Face fill-mask models in LEAN trading algorithms.", + "keywords": "fill-mask model, feature extraction, pre-trained AI model, embeddings, free AI models", + "og:description": "This page explains how to use Hugging Face fill-mask models in LEAN trading algorithms.", + "og:title": "Fill-Mask Models - Documentation QuantConnect.com", + "og:type": "website", + "og:site_name": "Fill-Mask Models - QuantConnect.com", + "og:image": "https://cdn.quantconnect.com/docs/i/writing-algorithms/machine-learning/hugging-face/popular-models/fill-mask.png" + } +} diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/01 Introduction.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/01 Introduction.html new file mode 100644 index 0000000000..e5da086b71 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/01 Introduction.html @@ -0,0 +1,9 @@ +

This page explains how to use Hugging Face text generation models in LEAN trading algorithms. These models generate text given an input prompt, which you can use for tasks like summarizing financial data or generating structured analysis. The following models are available:

+ + + +

Text generation models can analyze market context and generate structured outputs. You can prompt them to classify market conditions or extract trading signals from financial text. Note that larger models like Gemma-7B and DeepSeek-70B require GPU nodes with sufficient memory.

diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html new file mode 100644 index 0000000000..18a4e5841b --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html @@ -0,0 +1,119 @@ +

+ The following examples demonstrate usage of Hugging Face text generation models. +

+

+ Example 1: GPT-2 Market Condition Classifier +

+

+ The following algorithm uses GPT-2 to classify market conditions based on recent price data. + At the beginning of each month, it calculates trailing returns, volatility, and momentum for the universe of the 5 most liquid assets. + It then prompts GPT-2 to complete a structured market analysis template and parses the generated text to determine position sizing. +

+
+
from transformers import pipeline, set_seed
+
+class GPT2MarketAnalysisAlgorithm(QCAlgorithm):
+
+    def initialize(self):
+        self.set_start_date(2024, 9, 1)
+        self.set_end_date(2024, 12, 31)
+        self.set_cash(100_000)
+
+        self.settings.min_absolute_portfolio_target_percentage = 0
+
+        set_seed(1, True)
+
+        # Load the text generation pipeline with GPT-2.
+        self._generator = pipeline(
+            "text-generation",
+            model="openai-community/gpt2"
+        )
+
+        # Define the universe.
+        spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
+        self.universe_settings.schedule.on(self.date_rules.month_start(spy))
+        self.universe_settings.resolution = Resolution.DAILY
+        self._universe = self.add_universe(
+            self.universe.top(
+                self.get_parameter('universe_size', 5)
+            )
+        )
+
+        self._last_rebalance = datetime.min
+        self.schedule.on(
+            self.date_rules.month_start(spy, 1),
+            self.time_rules.midnight,
+            self._trade
+        )
+        self.set_warm_up(timedelta(31))
+
+    def _trade(self):
+        if self.is_warming_up:
+            return
+        if self.time - self._last_rebalance < timedelta(25):
+            return
+        self._last_rebalance = self.time
+
+        symbols = list(self._universe.selected)
+        if not symbols:
+            return
+
+        # Get trailing 60-day price data.
+        history = self.history(
+            symbols, 60, Resolution.DAILY
+        )['close'].unstack(0)
+
+        scores = {}
+        for symbol in symbols:
+            prices = history[symbol].dropna()
+            if len(prices) < 20:
+                continue
+
+            # Calculate features.
+            returns_20d = (prices.iloc[-1] / prices.iloc[-20] - 1) * 100
+            volatility = prices.pct_change().std() * np.sqrt(252) * 100
+
+            # Create a structured prompt.
+            prompt = (
+                f"Stock analysis: 20-day return {returns_20d:.1f}%, "
+                f"annualized volatility {volatility:.1f}%. "
+                f"Market outlook:"
+            )
+
+            # Generate text.
+            result = self._generator(
+                prompt, max_new_tokens=30, num_return_sequences=1,
+                do_sample=True, temperature=0.7
+            )
+            generated = result[0]['generated_text'].lower()
+
+            # Parse sentiment from generated text.
+            bullish_words = ['bullish', 'growth', 'strong', 'positive', 'upward', 'buy', 'rally']
+            bearish_words = ['bearish', 'decline', 'weak', 'negative', 'downward', 'sell', 'crash']
+
+            bull_count = sum(1 for w in bullish_words if w in generated)
+            bear_count = sum(1 for w in bearish_words if w in generated)
+
+            # Combine model signal with momentum.
+            momentum_signal = 1 if returns_20d > 0 else -1
+            model_signal = bull_count - bear_count
+            scores[symbol] = momentum_signal + model_signal * 0.5
+
+        if not scores:
+            return
+
+        # Normalize scores to portfolio weights.
+        total = sum(abs(v) for v in scores.values())
+        if total == 0:
+            return
+        weights = {s: v / total for s, v in scores.items()}
+
+        # Rebalance.
+        self.set_holdings(
+            [
+                PortfolioTarget(symbol, weight)
+                for symbol, weight in weights.items()
+            ],
+            True
+        )
+
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/metadata.json b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/metadata.json new file mode 100644 index 0000000000..f2a00fc3db --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/metadata.json @@ -0,0 +1,12 @@ +{ + "type": "metadata", + "values": { + "description": "This page explains how to use Hugging Face text generation models in LEAN trading algorithms.", + "keywords": "text generation model, GPT-2, pre-trained AI model, language model, free AI models", + "og:description": "This page explains how to use Hugging Face text generation models in LEAN trading algorithms.", + "og:title": "Text Generation Models - Documentation QuantConnect.com", + "og:type": "website", + "og:site_name": "Text Generation Models - QuantConnect.com", + "og:image": "https://cdn.quantconnect.com/docs/i/writing-algorithms/machine-learning/hugging-face/popular-models/text-generation.png" + } +} diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/01 Introduction.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/01 Introduction.html new file mode 100644 index 0000000000..aff2e2517b --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/01 Introduction.html @@ -0,0 +1,10 @@ +

This page explains how to use Chronos-Bolt in LEAN trading algorithms. The model repository provides the following description:

+ +
+

+ Chronos-Bolt models are a family of lightweight, efficient time series forecasting models. They are a follow-up to the original Chronos models, designed for faster inference and lower computational cost. + Chronos-Bolt uses a T5-based encoder-decoder architecture where the encoder processes the historical context and the decoder directly generates quantile forecasts. + Unlike the original Chronos models, Chronos-Bolt does not use tokenization, resulting in significantly faster inference. + For details, refer to the paper Chronos: Learning the Language of Time Series. +

+
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html new file mode 100644 index 0000000000..f56403d815 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html @@ -0,0 +1,148 @@ +

+ The following examples demonstrate usage of the Chronos-Bolt model. +

+

+ Example 1: Price Prediction +

+

+ The following algorithm selects the most liquid assets at the beginning of each month. + Once a quarter, it gets the trailing year of prices for all the assets in the universe and then forecasts the price paths over the upcoming quarter using Chronos-Bolt. + It then uses the SciPy package to find the weights that maximize the future Sharpe ratio of the portfolio and rebalances the portfolio to those weights. + Chronos-Bolt is a faster variant of Chronos-T5 that directly generates quantile forecasts. +

+
+
import torch
+import numpy as np
+from scipy.optimize import minimize
+from chronos import ChronosBoltPipeline
+from transformers import set_seed
+# endregion
+
+class ChronosBoltAlgorithm(QCAlgorithm):
+    """
+    This algorithm demonstrates how to use the Chronos-Bolt time
+    series forecasting model. It forecasts the future equity curves
+    of the 5 most liquid assets, then finds portfolio weights that
+    maximize the future Sharpe ratio. The portfolio is rebalanced
+    every 3 months.
+    """
+
+    def initialize(self):
+        self.set_start_date(2024, 9, 1)
+        self.set_end_date(2024, 12, 31)
+        self.set_cash(100_000)
+
+        self.settings.min_absolute_portfolio_target_percentage = 0
+
+        set_seed(1, True)
+
+        # Load the pre-trained Chronos-Bolt model.
+        self._pipeline = ChronosBoltPipeline.from_pretrained(
+            "autogluon/chronos-bolt-base",
+            device_map="cuda" if torch.cuda.is_available() else "cpu",
+            torch_dtype=torch.bfloat16,
+        )
+
+        # Define the universe.
+        spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
+        self.universe_settings.schedule.on(self.date_rules.month_start(spy))
+        self.universe_settings.resolution = Resolution.DAILY
+        self._universe = self.add_universe(
+            self.universe.top(
+                self.get_parameter('universe_size', 5)
+            )
+        )
+
+        self._lookback_period = timedelta(
+            365 * self.get_parameter('lookback_years', 1)
+        )
+        self._prediction_length = 3 * 21  # Three months of trading days
+
+        # Schedule rebalances.
+        self._last_rebalance = datetime.min
+        self.schedule.on(
+            self.date_rules.month_start(spy, 1),
+            self.time_rules.midnight,
+            self._trade
+        )
+        self.set_warmup(timedelta(31))
+
+    def _sharpe_ratio(
+            self, weights, returns, risk_free_rate,
+            trading_days_per_year=252):
+        mean_returns = returns.mean() * trading_days_per_year
+        cov_matrix = returns.cov() * trading_days_per_year
+        portfolio_return = np.sum(mean_returns * weights)
+        portfolio_std = np.sqrt(
+            np.dot(weights.T, np.dot(cov_matrix, weights))
+        )
+        sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std
+        return -sharpe_ratio
+
+    def _optimize_portfolio(self, equity_curves):
+        returns = equity_curves.pct_change().dropna()
+        num_assets = returns.shape[1]
+        initial_guess = num_assets * [1. / num_assets]
+        result = minimize(
+            self._sharpe_ratio,
+            initial_guess,
+            args=(
+                returns,
+                self.risk_free_interest_rate_model.get_interest_rate(
+                    self.time
+                )
+            ),
+            method='SLSQP',
+            bounds=tuple((0, 1) for _ in range(num_assets)),
+            constraints=(
+                {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
+            )
+        )
+        return result.x
+
+    def _trade(self):
+        if self.is_warming_up:
+            return
+        if self.time - self._last_rebalance < timedelta(80):
+            return
+        self._last_rebalance = self.time
+
+        symbols = list(self._universe.selected)
+
+        # Get historical equity curves.
+        history = self.history(
+            symbols, self._lookback_period
+        )['close'].unstack(0)
+
+        # Forecast the future equity curves using Chronos-Bolt.
+        # predict() returns (num_series, num_samples, prediction_length).
+        all_forecasts = self._pipeline.predict(
+            [
+                torch.tensor(history[symbol].dropna())
+                for symbol in symbols
+            ],
+            self._prediction_length
+        )
+
+        # Take the median forecast for each asset.
+        forecasts_df = pd.DataFrame(
+            {
+                symbol: np.quantile(
+                    all_forecasts[i].numpy(), 0.5, axis=0
+                )
+                for i, symbol in enumerate(symbols)
+            }
+        )
+
+        # Find the weights that maximize the forward Sharpe ratio.
+        optimal_weights = self._optimize_portfolio(forecasts_df)
+
+        # Rebalance the portfolio.
+        self.set_holdings(
+            [
+                PortfolioTarget(symbol, optimal_weights[i])
+                for i, symbol in enumerate(symbols)
+            ],
+            True
+        )
+
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/metadata.json b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/metadata.json new file mode 100644 index 0000000000..26b97e7c3a --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/metadata.json @@ -0,0 +1,12 @@ +{ + "type": "metadata", + "values": { + "description": "This page explains how to use Chronos-Bolt time series forecasting models in LEAN trading algorithms.", + "keywords": "time series forecasting model, Chronos-Bolt, pre-trained AI model, price prediction, free AI models", + "og:description": "This page explains how to use Chronos-Bolt time series forecasting models in LEAN trading algorithms.", + "og:title": "Chronos-Bolt - Documentation QuantConnect.com", + "og:type": "website", + "og:site_name": "Chronos-Bolt - QuantConnect.com", + "og:image": "https://cdn.quantconnect.com/docs/i/writing-algorithms/machine-learning/hugging-face/popular-models/chronos-bolt.png" + } +} diff --git a/Resources/machine-learning/hugging-face-table.html b/Resources/machine-learning/hugging-face-table.html index 857cf7a38f..9d78d7fe25 100644 --- a/Resources/machine-learning/hugging-face-table.html +++ b/Resources/machine-learning/hugging-face-table.html @@ -1,37 +1,124 @@ - + - + - - - - + + + + - - - - + + + + - - - - - - + + + + + + - +
NameCategoryExample
ahmedrachid/FinancialBERT-Sentiment-AnalysisText Classification
ahmedrachid/FinancialBERT-Sentiment-AnalysisText ClassificationExample
amazon/chronos-t5-baseTime Series ForecastingExample
amazon/chronos-t5-largeTime Series ForecastingExample
amazon/chronos-t5-smallTime Series ForecastingExample
amazon/chronos-t5-tinyTime Series ForecastingExample
autogluon/chronos-bolt-baseTime Series Forecasting
autogluon/chronos-bolt-baseTime Series ForecastingExample
autogluon/chronos-t5-baseTime Series ForecastingExample
autogluon/chronos-t5-largeTime Series ForecastingExample
autogluon/chronos-t5-tinyTime Series ForecastingExample
AutonLab/MOMENT-1-largeTime Series Forecasting
AventIQ-AI/sentiment-analysis-for-stock-market-sentimentTime Series Forecasting
bardsai/finance-sentiment-fr-baseText Classification
cardiffnlp/twitter-roberta-base-sentiment-latestText Classification
deepseek-ai/DeepSeek-R1-Distill-Llama-70BText Generation
AventIQ-AI/sentiment-analysis-for-stock-market-sentimenttype html> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AventIQ-AI/sentiment-analysis-for-stock-market-sentiment · Hugging Face + + + + + + + + + + Example
bardsai/finance-sentiment-fr-baseText ClassificationExample
cardiffnlp/twitter-roberta-base-sentiment-latestText ClassificationExample
deepseek-ai/DeepSeek-R1-Distill-Llama-70BText GenerationExample
distilbert/distilbert-base-cased-distilled-squadQuestion AnsweringExample
distilbert/distilbert-base-uncasedFill-Mask
FacebookAI/roberta-baseFill-Mask
google-bert/bert-base-uncasedFill-Mask
google/gemma-7bText Generation
distilbert/distilbert-base-uncasedFill-MaskExample
FacebookAI/roberta-baseFill-MaskExample
google-bert/bert-base-uncasedFill-MaskExample
google/gemma-7bText GenerationExample
ibm-granite/granite-timeseries-ttm-r1Time Series Forecasting
microsoft/deberta-baseFill-Mask
mrm8488/distilroberta-finetuned-financial-news-sentiment-analysisText Classification
nickmuchi/deberta-v3-base-finetuned-finance-text-classificationText Classification
nickmuchi/distilroberta-finetuned-financial-text-classificationText Classification
nickmuchi/sec-bert-finetuned-finance-classificationText Classification
openai-community/gpt2Text Generation
microsoft/deberta-baseFill-MaskExample
mrm8488/distilroberta-finetuned-financial-news-sentiment-analysisText ClassificationExample
nickmuchi/deberta-v3-base-finetuned-finance-text-classificationText ClassificationExample
nickmuchi/distilroberta-finetuned-financial-text-classificationText ClassificationExample
nickmuchi/sec-bert-finetuned-finance-classificationText ClassificationExample
openai-community/gpt2Text GenerationExample
ProsusAI/finbertText ClassificationExample
Salesforce/moirai-1.0-R-baseTime Series Forecasting
Salesforce/moirai-1.0-R-largeTime Series Forecasting
Salesforce/moirai-1.0-R-smallTime Series Forecasting
StephanAkkerman/FinTwitBERT-sentimentText Classification
StephanAkkerman/FinTwitBERT-sentimentText ClassificationExample
yiyanghkust/finbert-toneText ClassificationExample
diff --git a/code-generators/hugging-face-table-generator.py b/code-generators/hugging-face-table-generator.py index f4d2732b79..e6d0fc6f71 100644 --- a/code-generators/hugging-face-table-generator.py +++ b/code-generators/hugging-face-table-generator.py @@ -34,9 +34,28 @@ Repo: yiyanghkust/finbert-tone. Revisions ['4921590d3c0c3832c0efea24c8381ce0bda7844b']''' EXAMPLES = { + 'chronos-bolt' : '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/chronos-bolt', 'chronos-t5' : '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/chronos-t5', 'distilbert-base-cased-distilled-squad' : '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/distilbert', - 'finbert': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/finbert' + 'finbert': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/finbert', + # Sentiment analysis models + 'FinancialBERT-Sentiment-Analysis': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'sentiment-analysis-for-stock': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'finance-sentiment-fr': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'twitter-roberta-base-sentiment': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'distilroberta-finetuned-financial-news': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'deberta-v3-base-finetuned-finance': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'distilroberta-finetuned-financial-text': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'sec-bert-finetuned': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + 'FinTwitBERT': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/sentiment-analysis', + # Fill-mask models + 'bert-base-uncased': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/fill-mask', + 'roberta-base': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/fill-mask', + 'deberta-base': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/fill-mask', + # Text generation models + 'gpt2': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/text-generation', + 'gemma-7b': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/text-generation', + 'DeepSeek-R1-Distill': '/docs/v2/writing-algorithms/machine-learning/hugging-face/popular-models/text-generation', } def __to_row(line): diff --git a/examples-check/model_variant_results.json b/examples-check/model_variant_results.json new file mode 100644 index 0000000000..c0486f36a3 --- /dev/null +++ b/examples-check/model_variant_results.json @@ -0,0 +1,66 @@ +[ + [ + "Fill-Mask", + "distilbert/distilbert-base-uncased", + { + "net_profit": "29.430%", + "drawdown": "29.000%", + "sharpe": "1.586" + } + ], + [ + "Fill-Mask", + "FacebookAI/roberta-base", + { + "net_profit": "-9.417%", + "drawdown": "16.700%", + "sharpe": "-1.296" + } + ], + [ + "Fill-Mask", + "google-bert/bert-base-uncased", + { + "net_profit": "-9.417%", + "drawdown": "16.700%", + "sharpe": "-1.296" + } + ], + [ + "Fill-Mask", + "microsoft/deberta-base", + "FAIL: backtest timed out" + ], + [ + "Text Generation", + "openai-community/gpt2", + { + "net_profit": "19.558%", + "drawdown": "9.900%", + "sharpe": "1.952" + } + ], + [ + "Text Generation", + "google/gemma-7b", + "FAIL: backtest timed out" + ], + [ + "Text Generation", + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + { + "net_profit": "N/A", + "drawdown": "N/A", + "sharpe": "N/A" + } + ], + [ + "Chronos-Bolt", + "autogluon/chronos-bolt-base", + { + "net_profit": "7.246%", + "drawdown": "6.100%", + "sharpe": "0.686" + } + ] +] \ No newline at end of file diff --git a/examples-check/run_model_variants.py b/examples-check/run_model_variants.py new file mode 100644 index 0000000000..09da7bd9eb --- /dev/null +++ b/examples-check/run_model_variants.py @@ -0,0 +1,376 @@ +"""Run backtests for all HuggingFace model variants and output a results table. + +Usage: + python examples-check/run_model_variants.py + +Credentials (checked in order): + 1. Environment variables: DOCS_REGRESSION_TEST_USER_ID, DOCS_REGRESSION_TEST_USER_TOKEN + 2. LEAN CLI credentials file: ~/.lean/credentials +""" +import sys +import os +import json +import time +import base64 +import hashlib +from pathlib import Path + +try: + import requests +except ImportError: + sys.exit("Missing dependency: pip install requests") + +try: + from bs4 import BeautifulSoup +except ImportError: + sys.exit("Missing dependency: pip install beautifulsoup4") + + +# --------------------------------------------------------------------------- +# Credentials & API helpers +# --------------------------------------------------------------------------- + +def _load_credentials(): + user_id = os.environ.get("DOCS_REGRESSION_TEST_USER_ID", "") + user_token = os.environ.get("DOCS_REGRESSION_TEST_USER_TOKEN", "") + if user_id and user_token: + return user_id, user_token + lean_creds = Path.home() / ".lean" / "credentials" + if lean_creds.exists(): + try: + creds = json.loads(lean_creds.read_text()) + user_id = str(creds.get("user-id", "")) + user_token = creds.get("api-token", "") + if user_id and user_token: + return user_id, user_token + except (json.JSONDecodeError, KeyError): + pass + return "", "" + + +BASE_API = "https://www.quantconnect.com/api/v2" +USER_ID, USER_TOKEN = _load_credentials() +PYTHON_IMPORTS = "from AlgorithmImports import *\n" +MAX_RETRIES = 5 +POLL_INTERVAL = 10 # seconds between status polls +BACKTEST_TIMEOUT = 1800 # seconds +SLOW_THRESHOLD = 300 # seconds; placeholder backtests over this become skip-test +# Set to a list of category names to skip (e.g. already tested ones) +SKIP_CATEGORIES = ["Sentiment Analysis"] +# Reuse an existing project instead of creating new ones (avoids 100/day limit). +# Set to an integer project ID, or None to always create fresh projects. +REUSE_PROJECT_ID = 29125823 + + +def _headers(): + ts = str(int(time.time())) + hashed = hashlib.sha256(f"{USER_TOKEN}:{ts}".encode()).hexdigest() + auth = base64.b64encode(f"{USER_ID}:{hashed}".encode()).decode() + return {"Authorization": f"Basic {auth}", "Timestamp": ts} + + +def api_post(endpoint, payload=None): + return requests.post( + f"{BASE_API}/{endpoint}", + headers=_headers(), + json=payload or {} + ).json() + + +def clean_code(code): + return (code + .replace("&&", "&&") + .replace("<", "<") + .replace(">", ">")) + + +# --------------------------------------------------------------------------- +# HTML helpers +# --------------------------------------------------------------------------- + +def mark_skip_test(file_path): + """Change 'testable' to 'skip-test' on the first matching example div.""" + content = Path(file_path).read_text(encoding="utf-8") + updated = content.replace( + 'class="section-example-container testable"', + 'class="section-example-container skip-test"', + 1 # only first occurrence + ) + if updated != content: + Path(file_path).write_text(updated, encoding="utf-8") + print(f" → Backtest exceeded {SLOW_THRESHOLD}s — marked skip-test: {Path(file_path).name}") + + +# --------------------------------------------------------------------------- +# Extract base code from HTML example file +# --------------------------------------------------------------------------- + +def extract_python_code(file_path): + """Return the first testable Python QCAlgorithm code block in the file.""" + html = Path(file_path).read_text(encoding="utf-8") + soup = BeautifulSoup(html, "html.parser") + for div in soup.find_all("div", class_="section-example-container"): + classes = div.get("class", []) + if "skip-test" in classes: + continue + for pre in div.find_all("pre", class_="python"): + code = pre.get_text() + if "(QCAlgorithm)" in code: + return clean_code(code) + raise ValueError(f"No testable Python QCAlgorithm block found in {file_path}") + + +# --------------------------------------------------------------------------- +# QC Cloud: create project, compile, backtest +# --------------------------------------------------------------------------- + +def create_project_and_upload(code, label): + if REUSE_PROJECT_ID: + project_id = REUSE_PROJECT_ID + else: + ts = int(time.time()) + name = f"DocTest/variants_{ts}_{label[:20]}" + resp = api_post("projects/create", {"name": name, "language": "Py"}) + if not (resp.get("success") or resp.get("projects")): + raise RuntimeError(f"project create failed: {resp}") + project_id = resp["projects"][0]["projectId"] + + full_code = PYTHON_IMPORTS + code + for _ in range(MAX_RETRIES): + r = api_post("files/update", { + "projectId": project_id, + "name": "main.py", + "content": full_code + }) + if r.get("success"): + break + time.sleep(3) + else: + raise RuntimeError("file upload failed") + + return project_id + + +def compile_project(project_id): + resp = api_post("compile/create", {"projectId": project_id}) + if not resp.get("success"): + raise RuntimeError(f"compile create failed: {resp}") + compile_id = resp.get("compileId") or resp.get("compile", {}).get("compileId") + if not compile_id: + raise RuntimeError(f"no compileId in response: {resp}") + + deadline = time.time() + 300 + while time.time() < deadline: + time.sleep(POLL_INTERVAL) + r = api_post("compile/read", {"projectId": project_id, "compileId": compile_id}) + state = r.get("state") or r.get("compile", {}).get("state", "") + if state == "BuildSuccess": + return compile_id + if state == "BuildError": + logs = r.get("logs") or r.get("compile", {}).get("logs", []) + raise RuntimeError(f"compile error: {logs[-3:] if logs else r}") + raise RuntimeError("compile timed out") + + +def run_backtest(project_id, compile_id, label): + resp = api_post("backtests/create", { + "projectId": project_id, + "compileId": compile_id, + "backtestName": label[:64] + }) + if not resp.get("success"): + raise RuntimeError(f"backtest create failed: {resp}") + bt_id = resp["backtest"]["backtestId"] + + deadline = time.time() + BACKTEST_TIMEOUT + while time.time() < deadline: + time.sleep(POLL_INTERVAL) + r = api_post("backtests/read", {"projectId": project_id, "backtestId": bt_id}) + bt = r.get("backtest", {}) + progress = bt.get("progress", 0) + completed = bt.get("completed", False) + if completed: + return bt + if bt.get("error"): + raise RuntimeError(f"backtest error: {bt['error']}") + print(f" ... {int(progress*100)}% complete") + raise RuntimeError("backtest timed out") + + +def extract_stats(bt): + # 'statistics' has the full set of metrics we need + stats = bt.get("statistics") or {} + + def get(key, fallback="N/A"): + val = stats.get(key, fallback) + return val if val not in ("", None) else fallback + + return { + "net_profit": get("Net Profit"), + "drawdown": get("Drawdown"), + "sharpe": get("Sharpe Ratio"), + } + + +# --------------------------------------------------------------------------- +# Model variants per category +# --------------------------------------------------------------------------- + +BASE = Path(__file__).parent.parent + +CATEGORIES = [ + { + "name": "Sentiment Analysis", + "file": BASE / "03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/99 Examples.html", + "placeholder": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis", + "models": [ + "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis", + "ahmedrachid/FinancialBERT-Sentiment-Analysis", + "AventIQ-AI/sentiment-analysis-for-stock-market-sentiment", + "bardsai/finance-sentiment-fr-base", + "cardiffnlp/twitter-roberta-base-sentiment-latest", + "nickmuchi/deberta-v3-base-finetuned-finance-text-classification", + "nickmuchi/distilroberta-finetuned-financial-text-classification", + "nickmuchi/sec-bert-finetuned-finance-classification", + "StephanAkkerman/FinTwitBERT-sentiment", + ], + }, + { + "name": "Fill-Mask", + "file": BASE / "03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html", + "placeholder": "distilbert/distilbert-base-uncased", + "models": [ + "distilbert/distilbert-base-uncased", + "FacebookAI/roberta-base", + "google-bert/bert-base-uncased", + "microsoft/deberta-base", + ], + }, + { + "name": "Text Generation", + "file": BASE / "03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html", + "placeholder": "openai-community/gpt2", + "models": [ + "openai-community/gpt2", + "google/gemma-7b", + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + ], + }, + { + "name": "Chronos-Bolt", + "file": BASE / "03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html", + "placeholder": "autogluon/chronos-bolt-base", + "models": [ + "autogluon/chronos-bolt-base", + ], + }, +] + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + if not USER_ID or not USER_TOKEN: + sys.exit( + "Error: Set DOCS_REGRESSION_TEST_USER_ID and " + "DOCS_REGRESSION_TEST_USER_TOKEN environment variables." + ) + + resp = api_post("authenticate") + if not resp.get("success"): + sys.exit("API authentication failed. Check your credentials.") + print("API authentication successful.\n") + + results = [] # list of (category, model, stats_dict | error_str) + + for cat in CATEGORIES: + if cat["name"] in SKIP_CATEGORIES: + print(f"\nSkipping: {cat['name']}") + continue + print(f"\n{'='*60}") + print(f"Category: {cat['name']}") + print(f"{'='*60}") + + try: + base_code = extract_python_code(str(cat["file"])) + except Exception as e: + print(f" ERROR reading code: {e}") + for model in cat["models"]: + results.append((cat["name"], model, f"code read error: {e}")) + continue + + for model in cat["models"]: + label = f"{cat['name'][:8]}_{model.split('/')[-1][:20]}" + print(f"\n Model: {model}") + try: + code = base_code.replace(cat["placeholder"], model) + + print(f" Creating project...") + project_id = create_project_and_upload(code, label) + + print(f" Compiling (project {project_id})...") + compile_id = compile_project(project_id) + + print(f" Running backtest...") + bt_start = time.time() + bt = run_backtest(project_id, compile_id, label) + bt_elapsed = time.time() - bt_start + + if model == cat["placeholder"] and bt_elapsed > SLOW_THRESHOLD: + mark_skip_test(str(cat["file"])) + + stats = extract_stats(bt) + print(f" PASS — net profit={stats['net_profit']} " + f"drawdown={stats['drawdown']} sharpe={stats['sharpe']} " + f"({int(bt_elapsed)}s)") + results.append((cat["name"], model, stats)) + + except Exception as e: + import traceback + traceback.print_exc() + print(f" FAIL — {e}") + results.append((cat["name"], model, f"FAIL: {e}")) + + # ----------------------------------------------------------------------- + # Print results table + # ----------------------------------------------------------------------- + print("\n\n" + "="*80) + print("RESULTS TABLE (Markdown)") + print("="*80) + print() + + # Group by category + categories_seen = [] + rows_by_cat = {} + for cat_name, model, stats in results: + if cat_name not in rows_by_cat: + rows_by_cat[cat_name] = [] + categories_seen.append(cat_name) + rows_by_cat[cat_name].append((model, stats)) + + for cat_name in categories_seen: + rows = rows_by_cat[cat_name] + print(f"### {cat_name}\n") + print("| Model | Net Profit | Drawdown | Sharpe Ratio |") + print("|-------|-----------|----------|--------------|") + for model, stats in rows: + if isinstance(stats, dict): + np_ = stats['net_profit'] + dd = stats['drawdown'] + sr = stats['sharpe'] + print(f"| `{model}` | {np_} | {dd} | {sr} |") + else: + print(f"| `{model}` | {stats} | — | — |") + print() + + # Also dump raw JSON for debugging + output_path = Path(__file__).parent / "model_variant_results.json" + with open(output_path, "w") as f: + json.dump(results, f, indent=2, default=str) + print(f"Raw results saved to {output_path}") + + +if __name__ == "__main__": + main()