diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json index 2a337d7966..8de9478bae 100644 --- a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/00.json @@ -7,6 +7,10 @@ "featureShortDescription": { "03" : "Time series forecasting", "04" : "Question Answering", - "05" : "Sentiment analysis" + "05" : "Sentiment analysis", + "06" : "Text classification", + "07" : "Feature extraction", + "08" : "Text generation", + "12" : "Time series forecasting" } } \ No newline at end of file diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/01 Introduction.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/01 Introduction.html new file mode 100644 index 0000000000..12f5a7c2f5 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/06 Sentiment Analysis/01 Introduction.html @@ -0,0 +1,14 @@ +
This page explains how to use Hugging Face sentiment analysis models in LEAN trading algorithms. These models classify financial text into sentiment categories like positive, negative, and neutral. The following models are available:
+ +All of these models accept text input and return classification labels with confidence scores. You can use them with the Hugging Face transformers library to analyze the sentiment of financial news and social media posts, then use the results to inform trading decisions.
+ The following examples demonstrate usage of Hugging Face sentiment analysis models. +
++ The following algorithm selects the most volatile asset at the beginning of each month. + It gets the Tiingo News articles that were released for the asset over the previous 10 days and then feeds them into a sentiment analysis model. + It aggregates the sentiment scores of all the news releases. + If the aggregated sentiment is positive, it enters a long position for the month. + If it's negative, it enters a short position. + You can replace the model name with any of the sentiment analysis models listed on the introduction page. +
+from transformers import pipeline, set_seed
+
+class SentimentAnalysisModelAlgorithm(QCAlgorithm):
+
+ def initialize(self):
+ self.set_start_date(2024, 9, 1)
+ self.set_end_date(2024, 12, 31)
+ self.set_cash(100_000)
+
+ self.universe_settings.resolution = Resolution.DAILY
+ self.universe_settings.schedule.on(self.date_rules.month_start("SPY"))
+ self._universe = self.add_universe(
+ lambda fundamental: [
+ self.history(
+ [f.symbol for f in sorted(
+ fundamental, key=lambda f: f.dollar_volume
+ )[-10:]],
+ timedelta(365), Resolution.DAILY
+ )['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
+ ]
+ )
+
+ set_seed(1, True)
+
+ # Load the sentiment analysis pipeline.
+ # Replace the model name with any supported sentiment model.
+ self._sentiment_pipeline = pipeline(
+ "text-classification",
+ model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
+ )
+
+ self._last_rebalance_time = datetime.min
+ self.set_warm_up(30, Resolution.DAILY)
+
+ def on_warmup_finished(self):
+ self._trade()
+ self.schedule.on(
+ self.date_rules.month_start("SPY", 1),
+ self.time_rules.midnight,
+ self._trade
+ )
+
+ def on_securities_changed(self, changes):
+ for security in changes.removed_securities:
+ self.remove_security(security.dataset_symbol)
+ for security in changes.added_securities:
+ security.dataset_symbol = self.add_data(
+ TiingoNews, security.symbol
+ ).symbol
+
+ def _trade(self):
+ if (self.is_warming_up or
+ self.time - self._last_rebalance_time < timedelta(14)):
+ return
+
+ # Get the target security.
+ security = self.securities[list(self._universe.selected)[0]]
+
+ # Get the latest news articles.
+ articles = self.history[TiingoNews](
+ security.dataset_symbol, 10, Resolution.DAILY
+ )
+ article_text = [
+ article.description for article in articles
+ if article.description
+ ]
+ if not article_text:
+ return
+
+ # Run sentiment analysis on each article.
+ # Truncate long articles to the model's max length.
+ results = self._sentiment_pipeline(
+ article_text, truncation=True, max_length=512
+ )
+
+ # Aggregate sentiment scores.
+ positive_score = 0
+ negative_score = 0
+ for result in results:
+ label = result['label'].lower()
+ score = result['score']
+ if 'pos' in label:
+ positive_score += score
+ elif 'neg' in label:
+ negative_score += score
+
+ self.plot("Sentiment", "Positive", positive_score)
+ self.plot("Sentiment", "Negative", negative_score)
+
+ # Rebalance based on sentiment.
+ weight = 1 if positive_score > negative_score else -0.25
+ self.set_holdings(
+ security.symbol, weight,
+ liquidate_existing_holdings=True
+ )
+ self._last_rebalance_time = self.time
+This page explains how to use Hugging Face fill-mask models in LEAN trading algorithms. Fill-mask models predict the most likely word to fill a masked position in a sentence. You can use them to extract text embeddings and build feature vectors from financial text. The following models are available:
+ +These models are useful for extracting text embeddings from financial news. You can feed these embeddings into a downstream classifier or use cosine similarity to measure the semantic similarity between documents.
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html new file mode 100644 index 0000000000..ca79c40812 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/07 Fill-Mask/99 Examples.html @@ -0,0 +1,129 @@ ++ The following examples demonstrate usage of Hugging Face fill-mask models for feature extraction. +
++ The following algorithm selects a volatile asset at the beginning of each month. + It uses a fill-mask model to extract embeddings from Tiingo News articles. + It then compares the average embedding of recent news to a reference "bullish" and "bearish" embedding. + If the recent news is more similar to the bullish reference, it enters a long position. + You can replace the model name with any of the fill-mask models listed on the introduction page. +
+import torch
+import numpy as np
+from transformers import AutoTokenizer, AutoModel, set_seed
+
+class FillMaskEmbeddingAlgorithm(QCAlgorithm):
+
+ def initialize(self):
+ self.set_start_date(2024, 9, 1)
+ self.set_end_date(2024, 12, 31)
+ self.set_cash(100_000)
+
+ self.universe_settings.resolution = Resolution.DAILY
+ self.universe_settings.schedule.on(self.date_rules.month_start("SPY"))
+ self._universe = self.add_universe(
+ lambda fundamental: [
+ self.history(
+ [f.symbol for f in sorted(
+ fundamental, key=lambda f: f.dollar_volume
+ )[-10:]],
+ timedelta(365), Resolution.DAILY
+ )['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
+ ]
+ )
+
+ set_seed(1, True)
+
+ # Load the model and tokenizer.
+ # Replace with any fill-mask model (e.g., google-bert/bert-base-uncased).
+ model_name = "distilbert/distilbert-base-uncased"
+ self._tokenizer = AutoTokenizer.from_pretrained(model_name)
+ self._model = AutoModel.from_pretrained(model_name)
+ self._model.eval()
+
+ # Create reference embeddings for bullish/bearish text.
+ self._bullish_embedding = self._get_embedding(
+ "Stock prices surged on strong earnings and revenue growth."
+ )
+ self._bearish_embedding = self._get_embedding(
+ "Stock prices plunged on weak earnings and declining revenue."
+ )
+
+ self._last_rebalance_time = datetime.min
+ self.set_warm_up(30, Resolution.DAILY)
+
+ def on_warmup_finished(self):
+ self._trade()
+ self.schedule.on(
+ self.date_rules.month_start("SPY", 1),
+ self.time_rules.midnight,
+ self._trade
+ )
+
+ def on_securities_changed(self, changes):
+ for security in changes.removed_securities:
+ self.remove_security(security.dataset_symbol)
+ for security in changes.added_securities:
+ security.dataset_symbol = self.add_data(
+ TiingoNews, security.symbol
+ ).symbol
+
+ def _get_embedding(self, text):
+ """Extract the [CLS] token embedding from the model."""
+ inputs = self._tokenizer(
+ text, return_tensors="pt", truncation=True, max_length=512
+ )
+ with torch.no_grad():
+ outputs = self._model(**inputs)
+ # Use the [CLS] token (first token) embedding.
+ return outputs.last_hidden_state[:, 0, :].squeeze().numpy()
+
+ def _cosine_similarity(self, a, b):
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+
+ def _trade(self):
+ if (self.is_warming_up or
+ self.time - self._last_rebalance_time < timedelta(14)):
+ return
+
+ # Get the target security.
+ security = self.securities[list(self._universe.selected)[0]]
+
+ # Get the latest news articles.
+ articles = self.history[TiingoNews](
+ security.dataset_symbol, 10, Resolution.DAILY
+ )
+ article_text = [
+ article.description for article in articles
+ if article.description
+ ]
+ if not article_text:
+ return
+
+ # Get embeddings for each article and average them.
+ embeddings = [self._get_embedding(text) for text in article_text]
+ avg_embedding = np.mean(embeddings, axis=0)
+
+ # Compare to reference embeddings.
+ bullish_sim = self._cosine_similarity(
+ avg_embedding, self._bullish_embedding
+ )
+ bearish_sim = self._cosine_similarity(
+ avg_embedding, self._bearish_embedding
+ )
+
+ self.plot("Similarity", "Bullish", bullish_sim)
+ self.plot("Similarity", "Bearish", bearish_sim)
+
+ # Rebalance based on similarity.
+ weight = 1 if bullish_sim > bearish_sim else -0.25
+ self.set_holdings(
+ security.symbol, weight,
+ liquidate_existing_holdings=True
+ )
+ self._last_rebalance_time = self.time
+This page explains how to use Hugging Face text generation models in LEAN trading algorithms. These models generate text given an input prompt, which you can use for tasks like summarizing financial data or generating structured analysis. The following models are available:
+ +Text generation models can analyze market context and generate structured outputs. You can prompt them to classify market conditions or extract trading signals from financial text. Note that larger models like Gemma-7B and DeepSeek-70B require GPU nodes with sufficient memory.
diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html new file mode 100644 index 0000000000..18a4e5841b --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/08 Text Generation/99 Examples.html @@ -0,0 +1,119 @@ ++ The following examples demonstrate usage of Hugging Face text generation models. +
++ The following algorithm uses GPT-2 to classify market conditions based on recent price data. + At the beginning of each month, it calculates trailing returns, volatility, and momentum for the universe of the 5 most liquid assets. + It then prompts GPT-2 to complete a structured market analysis template and parses the generated text to determine position sizing. +
+from transformers import pipeline, set_seed
+
+class GPT2MarketAnalysisAlgorithm(QCAlgorithm):
+
+ def initialize(self):
+ self.set_start_date(2024, 9, 1)
+ self.set_end_date(2024, 12, 31)
+ self.set_cash(100_000)
+
+ self.settings.min_absolute_portfolio_target_percentage = 0
+
+ set_seed(1, True)
+
+ # Load the text generation pipeline with GPT-2.
+ self._generator = pipeline(
+ "text-generation",
+ model="openai-community/gpt2"
+ )
+
+ # Define the universe.
+ spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
+ self.universe_settings.schedule.on(self.date_rules.month_start(spy))
+ self.universe_settings.resolution = Resolution.DAILY
+ self._universe = self.add_universe(
+ self.universe.top(
+ self.get_parameter('universe_size', 5)
+ )
+ )
+
+ self._last_rebalance = datetime.min
+ self.schedule.on(
+ self.date_rules.month_start(spy, 1),
+ self.time_rules.midnight,
+ self._trade
+ )
+ self.set_warm_up(timedelta(31))
+
+ def _trade(self):
+ if self.is_warming_up:
+ return
+ if self.time - self._last_rebalance < timedelta(25):
+ return
+ self._last_rebalance = self.time
+
+ symbols = list(self._universe.selected)
+ if not symbols:
+ return
+
+ # Get trailing 60-day price data.
+ history = self.history(
+ symbols, 60, Resolution.DAILY
+ )['close'].unstack(0)
+
+ scores = {}
+ for symbol in symbols:
+ prices = history[symbol].dropna()
+ if len(prices) < 20:
+ continue
+
+ # Calculate features.
+ returns_20d = (prices.iloc[-1] / prices.iloc[-20] - 1) * 100
+ volatility = prices.pct_change().std() * np.sqrt(252) * 100
+
+ # Create a structured prompt.
+ prompt = (
+ f"Stock analysis: 20-day return {returns_20d:.1f}%, "
+ f"annualized volatility {volatility:.1f}%. "
+ f"Market outlook:"
+ )
+
+ # Generate text.
+ result = self._generator(
+ prompt, max_new_tokens=30, num_return_sequences=1,
+ do_sample=True, temperature=0.7
+ )
+ generated = result[0]['generated_text'].lower()
+
+ # Parse sentiment from generated text.
+ bullish_words = ['bullish', 'growth', 'strong', 'positive', 'upward', 'buy', 'rally']
+ bearish_words = ['bearish', 'decline', 'weak', 'negative', 'downward', 'sell', 'crash']
+
+ bull_count = sum(1 for w in bullish_words if w in generated)
+ bear_count = sum(1 for w in bearish_words if w in generated)
+
+ # Combine model signal with momentum.
+ momentum_signal = 1 if returns_20d > 0 else -1
+ model_signal = bull_count - bear_count
+ scores[symbol] = momentum_signal + model_signal * 0.5
+
+ if not scores:
+ return
+
+ # Normalize scores to portfolio weights.
+ total = sum(abs(v) for v in scores.values())
+ if total == 0:
+ return
+ weights = {s: v / total for s, v in scores.items()}
+
+ # Rebalance.
+ self.set_holdings(
+ [
+ PortfolioTarget(symbol, weight)
+ for symbol, weight in weights.items()
+ ],
+ True
+ )
+This page explains how to use Chronos-Bolt in LEAN trading algorithms. The model repository provides the following description:
+ ++diff --git a/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html new file mode 100644 index 0000000000..f56403d815 --- /dev/null +++ b/03 Writing Algorithms/31 Machine Learning/04 Hugging Face/02 Popular Models/12 Chronos-Bolt/99 Examples.html @@ -0,0 +1,148 @@ ++ Chronos-Bolt models are a family of lightweight, efficient time series forecasting models. They are a follow-up to the original Chronos models, designed for faster inference and lower computational cost. + Chronos-Bolt uses a T5-based encoder-decoder architecture where the encoder processes the historical context and the decoder directly generates quantile forecasts. + Unlike the original Chronos models, Chronos-Bolt does not use tokenization, resulting in significantly faster inference. + For details, refer to the paper Chronos: Learning the Language of Time Series. +
+
+ The following examples demonstrate usage of the Chronos-Bolt model. +
++ The following algorithm selects the most liquid assets at the beginning of each month. + Once a quarter, it gets the trailing year of prices for all the assets in the universe and then forecasts the price paths over the upcoming quarter using Chronos-Bolt. + It then uses the SciPy package to find the weights that maximize the future Sharpe ratio of the portfolio and rebalances the portfolio to those weights. + Chronos-Bolt is a faster variant of Chronos-T5 that directly generates quantile forecasts. +
+import torch
+import numpy as np
+from scipy.optimize import minimize
+from chronos import ChronosBoltPipeline
+from transformers import set_seed
+# endregion
+
+class ChronosBoltAlgorithm(QCAlgorithm):
+ """
+ This algorithm demonstrates how to use the Chronos-Bolt time
+ series forecasting model. It forecasts the future equity curves
+ of the 5 most liquid assets, then finds portfolio weights that
+ maximize the future Sharpe ratio. The portfolio is rebalanced
+ every 3 months.
+ """
+
+ def initialize(self):
+ self.set_start_date(2024, 9, 1)
+ self.set_end_date(2024, 12, 31)
+ self.set_cash(100_000)
+
+ self.settings.min_absolute_portfolio_target_percentage = 0
+
+ set_seed(1, True)
+
+ # Load the pre-trained Chronos-Bolt model.
+ self._pipeline = ChronosBoltPipeline.from_pretrained(
+ "autogluon/chronos-bolt-base",
+ device_map="cuda" if torch.cuda.is_available() else "cpu",
+ torch_dtype=torch.bfloat16,
+ )
+
+ # Define the universe.
+ spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
+ self.universe_settings.schedule.on(self.date_rules.month_start(spy))
+ self.universe_settings.resolution = Resolution.DAILY
+ self._universe = self.add_universe(
+ self.universe.top(
+ self.get_parameter('universe_size', 5)
+ )
+ )
+
+ self._lookback_period = timedelta(
+ 365 * self.get_parameter('lookback_years', 1)
+ )
+ self._prediction_length = 3 * 21 # Three months of trading days
+
+ # Schedule rebalances.
+ self._last_rebalance = datetime.min
+ self.schedule.on(
+ self.date_rules.month_start(spy, 1),
+ self.time_rules.midnight,
+ self._trade
+ )
+ self.set_warmup(timedelta(31))
+
+ def _sharpe_ratio(
+ self, weights, returns, risk_free_rate,
+ trading_days_per_year=252):
+ mean_returns = returns.mean() * trading_days_per_year
+ cov_matrix = returns.cov() * trading_days_per_year
+ portfolio_return = np.sum(mean_returns * weights)
+ portfolio_std = np.sqrt(
+ np.dot(weights.T, np.dot(cov_matrix, weights))
+ )
+ sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std
+ return -sharpe_ratio
+
+ def _optimize_portfolio(self, equity_curves):
+ returns = equity_curves.pct_change().dropna()
+ num_assets = returns.shape[1]
+ initial_guess = num_assets * [1. / num_assets]
+ result = minimize(
+ self._sharpe_ratio,
+ initial_guess,
+ args=(
+ returns,
+ self.risk_free_interest_rate_model.get_interest_rate(
+ self.time
+ )
+ ),
+ method='SLSQP',
+ bounds=tuple((0, 1) for _ in range(num_assets)),
+ constraints=(
+ {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
+ )
+ )
+ return result.x
+
+ def _trade(self):
+ if self.is_warming_up:
+ return
+ if self.time - self._last_rebalance < timedelta(80):
+ return
+ self._last_rebalance = self.time
+
+ symbols = list(self._universe.selected)
+
+ # Get historical equity curves.
+ history = self.history(
+ symbols, self._lookback_period
+ )['close'].unstack(0)
+
+ # Forecast the future equity curves using Chronos-Bolt.
+ # predict() returns (num_series, num_samples, prediction_length).
+ all_forecasts = self._pipeline.predict(
+ [
+ torch.tensor(history[symbol].dropna())
+ for symbol in symbols
+ ],
+ self._prediction_length
+ )
+
+ # Take the median forecast for each asset.
+ forecasts_df = pd.DataFrame(
+ {
+ symbol: np.quantile(
+ all_forecasts[i].numpy(), 0.5, axis=0
+ )
+ for i, symbol in enumerate(symbols)
+ }
+ )
+
+ # Find the weights that maximize the forward Sharpe ratio.
+ optimal_weights = self._optimize_portfolio(forecasts_df)
+
+ # Rebalance the portfolio.
+ self.set_holdings(
+ [
+ PortfolioTarget(symbol, optimal_weights[i])
+ for i, symbol in enumerate(symbols)
+ ],
+ True
+ )
+