In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pandas as pd
import json
from datetime import datetime
from pandas.tseries.offsets import BDay

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 1. loading fine tuned OPT and tokenizer
model_path = "./opt-finetuned"  
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-2.7b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# 2. the headline
headline = "Google, Meta face penalties for anti-competitive behaviour towards South African news media."

In [8]:
# 3. tokenize the headline
inputs = tokenizer(
    headline,
    padding='max_length',
    truncation=True,
    max_length=128,
    return_tensors="pt"
)

In [9]:
# 4. eval_mode and then inference
model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities).item()

In [10]:
# 5. Interpret results
class_labels = {0: "We Down", 1: "We Up"}
print(f"Input: {headline}")
print(f"Predicted class: {predicted_class} ({class_labels[predicted_class]})")
print(f"Class probabilities: {probabilities.tolist()[0]}")

Input: Google, Meta face penalties for anti-competitive behaviour towards South African news media.
Predicted class: 1 (We Up)
Class probabilities: [0.09504533559083939, 0.904954731464386]


In [None]:
def predict_movement(headline):
    inputs = tokenizer(headline, return_tensors="pt",
                       padding=True, truncation=True)
    outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()
    return predicted_class, probabilities

In [None]:
def long_short_strategy(headline):
    predicted_class, _ = predict_movement(headline)

    if predicted_class == 1:
        print("Strategy: Go Long on the stock")
        # Implement your long strategy here
    else:
        print("Strategy: Go Short on the stock")

In [None]:
# Function to implement the long-short strategy
def long_short_strategy(headline, current_price, portfolio, position, shares):
    predicted_class, _ = predict_movement(headline)

    if predicted_class == 1:
        print("Strategy: Go Long on the stock")
        if position == "short":
            # Close short position
            portfolio += shares * current_price
            shares = 0
            position = "none"
        if position == "none":
            # Open long position
            amount_to_invest = min(0.2 * portfolio, portfolio)
            shares = amount_to_invest // current_price
            portfolio -= shares * current_price
            position = "long"
    else:
        print("Strategy: Go Short on the stock")
        if position == "long":
            # Close long position
            portfolio += shares * current_price
            shares = 0
            position = "none"
        if position == "none":
            # Open short position
            amount_to_invest = min(0.2 * portfolio, portfolio)
            shares = amount_to_invest // current_price
            portfolio += shares * current_price
            position = "short"

    return portfolio, position, shares


# Load the stock returns data
goog = pd.read_csv("stock_returns.csv")

# Load the news data
with open("google_news.json", "r") as json_file:
    google_news = json.load(json_file)

# Initialize portfolio
portfolio = 10000
position = "none"
shares = 0

# Iterate through the news data
for news_item in google_news:
    headline = news_item['headline']
    news_date = news_item['datetime']

    # Convert news_date to datetime object
    news_date_dt = datetime.strptime(news_date, '%Y-%m-%d')

    # Find the next business day for the opening price
    next_business_day = news_date_dt
    while next_business_day.strftime('%Y-%m-%d') not in goog['Date'].values:
        next_business_day += BDay(1)
    next_business_day_str = next_business_day.strftime('%Y-%m-%d')

    # Get the opening price on the next business day
    try:
        current_price = goog.loc[goog['Date'] ==
                                 next_business_day_str, 'Open'].values[0]
    except IndexError:
        # If the date is not found in the stock data, skip this news item
        print(
            f"Skipping news item on {news_date} - opening price not found for {next_business_day_str}")
        continue

    # Apply the long-short strategy
    portfolio, position, shares = long_short_strategy(
        headline, current_price, portfolio, position, shares)

print(f"Final portfolio value: {portfolio}")