# Import required libraries

In [None]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.2.1-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.8/255.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.2.1


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from datasets import Dataset
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Initialize sentence transformer model for embedding creation

In [None]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Knowledge Base
knowledge_base = [
    # Cryptocurrency and Basic Market Concepts
    "Bitcoin is a decentralized digital cryptocurrency.",
    "ROE (Return on Equity) measures a company's profitability relative to shareholders' equity.",
    "P/E ratio (Price to Earnings) helps evaluate if a stock is overvalued or undervalued.",
    "Market capitalization is the total value of a company's shares.",
    "A bull market refers to a market that is rising.",
    "A bear market refers to a market that is declining.",

    # NLP Trading
    "NLP (Natural Language Processing) trading uses text analysis of news, social media, and financial reports to make trading decisions.",
    "Sentiment analysis in trading measures market sentiment through text analysis of news articles and social media posts.",
    "NLP trading algorithms can process earnings call transcripts to predict stock price movements.",
    "Text mining in trading involves extracting valuable trading signals from unstructured text data.",
    "NLP trading systems often use word embeddings to convert financial text into numerical features.",
    "Topic modeling helps identify key themes in financial news that might impact market movements.",
    "Named Entity Recognition (NER) in NLP trading identifies companies, people, and events in financial texts.",

    # Hierarchical Trading
    "Hierarchical trading organizes trading strategies in a tree-like structure with different levels of decision-making.",
    "Top-level hierarchical trading decisions often involve asset allocation across different markets.",
    "Mid-level hierarchical trading focuses on sector or industry-specific strategies.",
    "Bottom-level hierarchical trading deals with individual security selection.",
    "Hierarchical risk parity is a portfolio optimization technique that considers the hierarchical structure of assets.",
    "Hierarchical clustering in trading helps identify groups of similar financial instruments.",
    "Hierarchical momentum strategies combine momentum signals at different time scales.",

    # Clustering in Trading
    "Cluster analysis in trading groups similar financial instruments based on their characteristics.",
    "K-means clustering is commonly used to identify groups of stocks with similar price movements.",
    "DBSCAN clustering can identify unusual market behavior patterns.",
    "Clustering helps in portfolio diversification by identifying truly different assets.",
    "Time series clustering groups assets with similar historical price patterns.",
    "Cluster labels can be used as features in trading algorithms.",

    # Pair Trading
    "Pair trading involves simultaneously buying one security and selling another related security.",
    "Statistical arbitrage in pair trading profits from price divergences between related securities.",
    "Cointegration is a key statistical concept used to identify suitable pairs for trading.",
    "Mean reversion is the fundamental principle behind pair trading strategies.",
    "Pairs can be selected based on fundamental similarities or statistical relationships.",
    "The spread between pairs is monitored for trading signals.",
    "Risk management in pair trading involves setting stop-loss levels for the spread.",

    # Advanced Trading Concepts
    "Kalman filters can be used to dynamically adjust pair trading ratios.",
    "Machine learning models can identify optimal entry and exit points in pair trades.",
    "Cross-sectional momentum strategies rank assets based on their relative performance.",
    "Dynamic time warping helps compare price series with different lengths or speeds.",
    "Regime detection algorithms identify different market states for strategy adjustment.",

    # Risk Management
    "Position sizing in hierarchical strategies depends on the level in the hierarchy.",
    "Correlation analysis helps measure the relationship between paired assets.",
    "Portfolio heat maps visualize hierarchical relationships between assets.",
    "Stop-loss orders should consider the volatility of the trading pair.",
    "Risk metrics should be calculated at each level of the trading hierarchy.",

    # Technical Implementation
    "Python libraries like scikit-learn provide clustering algorithms for trading.",
    "Natural Language Toolkit (NLTK) is commonly used for text processing in NLP trading.",
    "Time series databases are optimal for storing high-frequency trading data.",
    "Neural networks can learn complex patterns in hierarchical trading systems.",
    "MongoDB is often used to store unstructured financial text data.",

    # Market Microstructure
    "Tick data analysis reveals fine-grained price formation patterns.",
    "Order book dynamics influence short-term price movements.",
    "Market impact should be considered when executing pair trades.",
    "High-frequency pair trading requires sophisticated execution systems.",
    "Latency considerations are crucial in statistical arbitrage strategies.",

    # Performance Measurement
    "Sharpe ratio measures risk-adjusted returns of trading strategies.",
    "Maximum drawdown indicates the largest peak-to-trough decline.",
    "Information ratio shows excess returns relative to a benchmark.",
    "Strategy capacity indicates the maximum capital that can be deployed.",
    "Transaction costs significantly impact pair trading profitability."
]

# Create embeddings for the knowledge base

In [None]:
knowledge_embeddings = embedding_model.encode(knowledge_base)

In [None]:
class FinancialChatbot:
    def __init__(self):
        # Initialize the model and tokenizer for text generation
        self.generator = pipeline('text-generation',
                                model='gpt2',  # Using GPT-2 for better text generation
                                max_length=150,
                                pad_token_id=50256)

        # Initialize sentence transformer for semantic search
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

    def get_relevant_context(self, query, top_k=3):
        """
        Retrieve relevant information from knowledge base using semantic search
        """
        query_embedding = self.embedding_model.encode([query])
        knowledge_embeddings = self.embedding_model.encode(knowledge_base)

        similarities = cosine_similarity(query_embedding, knowledge_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:]

        relevant_context = [knowledge_base[i] for i in top_indices]
        return " ".join(relevant_context)

    def generate_response(self, user_input):
        """
        Generate chatbot response using the model and relevant context
        """
        # Get relevant context
        context = self.get_relevant_context(user_input)

        # Construct prompt
        prompt = f"Based on this context: {context}\n\nQuestion: {user_input}\n\nDetailed Answer:"

        # Generate response
        response = self.generator(prompt,
                                max_length=150,
                                num_return_sequences=1,
                                temperature=0.7,
                                do_sample=True)[0]['generated_text']

        # Clean up response to get only the answer part
        try:
            answer = response.split("Detailed Answer:")[1].strip()
        except:
            answer = response.split(prompt)[1].strip()

        # If answer is too short or empty, provide a fallback response
        if len(answer) < 20:

            # Add more fallback responses for other common queries
            return "Let me explain based on the available information: " + context

        return answer

In [None]:
# Initialize chatbot
chatbot = FinancialChatbot()

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def chat():
    print("Financial Chatbot: Hi! I'm here to help you with financial information. Type 'quit' to exit.")

    while True:
        user_input = input("You: ")

        if user_input.lower() == 'quit':
            print("Financial Chatbot: Goodbye!")
            break

        try:
            response = chatbot.generate_response(user_input)
            print(f"Financial Chatbot: {response}")
        except Exception as e:
            print(f"Financial Chatbot: I apologize, but I encountered an error. Could you rephrase your question?")

In [None]:
# Run the chatbot
if __name__ == "__main__":
    chat()

Financial Chatbot: Hi! I'm here to help you with financial information. Type 'quit' to exit.
You: what is pair trading


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Financial Chatbot: Pair trading is a trade of pairs of two identical securities. This trading involves the acquisition of both a security and a seller's share in the security, as well as a seller's share in the security's price.

The value of the security is determined by the sale price of the security. The seller's share in the security is determined by the value of the security. The value of the security is determined by the exchange rate of the security's price.

The value of a security depends
You: quit
Financial Chatbot: Goodbye!
