# **1. Import the necessary libraries**

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# **2. Financial Strategy Generation Using GPT2 model**

In [2]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Function to generate investment strategy
def generate_investment_strategy(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=200,
        num_return_sequences=1,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    # return tokenizer.decode(outputs[0], skip_special_tokens=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Prompts
prompts = [
    "An investor is interested in diversifying their portfolio with a focus on high-growth technology stocks. Provide a strategy that includes recommendations for risk management and portfolio diversification.",
    "An investor is looking to create a balanced portfolio with a mix of stocks, bonds, and commodities.Provide a strategy that includes asset allocation suggestions and risk management techniques.",
    "A conservative investor wants to focus on low-risk, income-generating assets like dividend stocks and bonds. Provide a strategy with recommendations for building a stable income-focused portfolio."
]

# Generate strategies
for prompt in prompts:
    print("Prompt:", prompt)
    print("Generated Strategy:\n", generate_investment_strategy(prompt))
    print("=" * 80)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Prompt: An investor is interested in diversifying their portfolio with a focus on high-growth technology stocks. Provide a strategy that includes recommendations for risk management and portfolio diversification.
Generated Strategy:
 An investor is interested in diversifying their portfolio with a focus on high-growth technology stocks. Provide a strategy that includes recommendations for risk management and portfolio diversification.

In addition, it is important to understand the risk profile of high-growth technology stocks. For investors who want to reduce risks, the risk profile of high-growth technology stocks can be summarized as follows:

1. High-growth technology stocks are generally high in total assets, with no significant correlation to growth, but they are also much higher in total assets than high-growth stocks.

2. High-growth stocks are generally very volatile, with low returns and limited growth.

3. High-growth stocks are generally very volatile, with high returns and

**`Tried to use FinGPT which is specific to financial data but getting some error`**

In [1]:
# from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizerFast
# from peft import PeftModel  # 0.5.0
# import torch

# # Load Models
# base_model = "meta-llama/Meta-Llama-3-8B"
# peft_model = "FinGPT/fingpt-mt_llama3-8b_lora"
# tokenizer = LlamaTokenizerFast.from_pretrained(base_model, trust_remote_code=True)
# tokenizer.pad_token = tokenizer.eos_token
# model = LlamaForCausalLM.from_pretrained(base_model, trust_remote_code=True, device_map = "cuda:0")
# model = PeftModel.from_pretrained(model, peft_model)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

# # Prompts
# prompt = [
#     "An investor is interested in diversifying their portfolio with a focus on high-growth technology stocks. Provide a strategy that includes recommendations for risk management and portfolio diversification."
#     ]

# tokens = tokenizer(prompt, return_tensors='pt', padding=True, max_length=512).to(device)
# res = model.generate(**tokens, max_length=512)
# res_sentences = [tokenizer.decode(i) for i in res]
# out_text = [o.split("Answer: ")[1] for o in res_sentences]




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


OSError: Can't load tokenizer for 'meta-llama/Meta-Llama-3-8B'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'meta-llama/Meta-Llama-3-8B' is the correct path to a directory containing all relevant files for a LlamaTokenizerFast tokenizer.

# **3. Sentiment analysis of Financial Tweets Using distilbert model.**

In [10]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load a sentiment analysis model
sentiment_model_name = "distilbert-base-uncased-finetuned-sst-2-english"
sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)

# Function for sentiment analysis
def analyze_sentiment(text):
    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = sentiment_model(**inputs)
    logits = outputs.logits
    sentiment = torch.argmax(logits, dim=1).item()

    sentiments = ["Negative", "Neutral", "Positive"]
    confidence = torch.nn.functional.softmax(logits, dim=1).max().item()
    return sentiments[sentiment], confidence

# Example Tweets
tweets = [
    "Canadian dollar slumps to near 5-year low as traders bet on big Bank of Canada cut",
    "Stocks are surging today, with tech companies leading the way! Bullish sentiment driving the market!",
    "The market is in turmoil due to unexpected inflation data. Investors are panicking.",
    "U.S. stocks have never been so overhyped by investors, relative to the rest of the world"
]

# Analyze sentiment for each tweet
for tweet in tweets:
    sentiment, confidence = analyze_sentiment(tweet)
    print(f"Tweet: {tweet}")
    print(f"Sentiment: {sentiment}, Confidence: {confidence:.2f}")
    print("=" * 80)

Tweet: Canadian dollar slumps to near 5-year low as traders bet on big Bank of Canada cut
Sentiment: Negative, Confidence: 1.00
Tweet: Stocks are surging today, with tech companies leading the way! Bullish sentiment driving the market!
Sentiment: Neutral, Confidence: 0.98
Tweet: The market is in turmoil due to unexpected inflation data. Investors are panicking.
Sentiment: Negative, Confidence: 0.99
Tweet: U.S. stocks have never been so overhyped by investors, relative to the rest of the world
Sentiment: Neutral, Confidence: 0.99


# **4. Sentiment analysis Using FinBert model which is specific to finance data**

In [5]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline


In [6]:
finbert= BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
tokenizer=BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [9]:
nlp=pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)

tweets = [
    "Canadian dollar slumps to near 5-year low as traders bet on big Bank of Canada cut",
    "Stocks are surging today, with tech companies leading the way! Bullish sentiment driving the market!",
    "The market is in turmoil due to unexpected inflation data. Investors are panicking.",
    "U.S. stocks have never been so overhyped by investors, relative to the rest of the world"
]
results=nlp(tweets)
print(results)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'Negative', 'score': 0.9999197721481323}, {'label': 'Positive', 'score': 0.9999998807907104}, {'label': 'Negative', 'score': 0.9999901056289673}, {'label': 'Neutral', 'score': 0.9997472167015076}]
