In [18]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pandas as pd
import json
from datetime import datetime
from pandas.tseries.offsets import BDay
from collections import defaultdict
import os

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# 1. loading fine tuned OPT and tokenizer
model_path = r"c:\Users\sanya\sentimentally_trading\opt-finetuned"

model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

c:\Users\sanya\sentimentally_trading


OSError: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory ./opt-finetuned.

In [None]:
# 2. the headline
headline = "Google, Meta face penalties for anti-competitive behaviour towards South African news media."

In [None]:
# 3. tokenize the headline
inputs = tokenizer(
    headline,
    padding='max_length',
    truncation=True,
    max_length=128,
    return_tensors="pt"
)

In [None]:
# 4. eval_mode and then inference
model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities).item()

NameError: name 'model' is not defined

In [None]:
# 5. print results
class_labels = {0: "We Down", 1: "We Up"}
print(f"Input: {headline}")
print(f"Predicted class: {predicted_class} ({class_labels[predicted_class]})")
print(f"Class probabilities: {probabilities.tolist()[0]}")
print(predicted_class)

Input: Google, Meta face penalties for anti-competitive behaviour towards South African news media.
Predicted class: 1 (We Up)
Class probabilities: [0.09504533559083939, 0.904954731464386]


In [None]:
def predict_movement(headline):
    inputs = tokenizer(headline, return_tensors="pt",
                       padding=True, truncation=True)
    outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(probabilities, dim=-1).item()
    return predicted_class, probabilities

In [None]:
def long_short_strategy(predicted_class, current_price, portfolio, position, shares):
    if predicted_class == 1:
        #print("Long the stock")
        if position == "short":
            # Close short position
            portfolio += shares * current_price
            shares = 0
            position = "none"
        if position == "none":
            # Open long position
            amount_to_invest = min(0.2 * portfolio, portfolio)
            shares = amount_to_invest // current_price
            portfolio -= shares * current_price
            position = "long"
    else:
        #print("Short the stock")
        if position == "long":
            # Close long position
            portfolio += shares * current_price
            shares = 0
            position = "none"
        if position == "none":
            # Open short position
            amount_to_invest = min(0.2 * portfolio, portfolio)
            shares = amount_to_invest // current_price
            portfolio += shares * current_price
            position = "short"

    return portfolio, position, shares

In [None]:
# testing !

# get stock returns data
goog = pd.read_csv("stock_returns.csv")

# get the news data
with open("test_news.json", "r") as json_file:
    google_news = json.load(json_file)

# init portfolio with 10k
portfolio = 10000
position = "none"
shares = 0

news_by_date = defaultdict(list)
for news_item in google_news:
    news_date = news_item['datetime']
    news_by_date[news_date].append(news_item['headline'])

KeyError: 'Date'

In [None]:
portfolio_data = defaultdict()

In [None]:

# go through the news data
for news_date, headlines in news_by_date.items():
    # Convert news_date to datetime object
    news_date_dt = datetime.strptime(news_date, '%Y-%m-%d')

    # Find the next business day for the opening price
    next_business_day = news_date_dt
    while next_business_day.strftime('%Y-%m-%d') not in goog['Date'].values:
        next_business_day += BDay(1)
    next_business_day_str = next_business_day.strftime('%Y-%m-%d')

    # Get the opening price on the next business day
    try:
        current_price = goog.loc[goog['Date'] ==
                                 next_business_day_str, 'Open'].values[0]
        print(f"Opening price on {next_business_day_str}: {current_price}")
    except IndexError:
        # If the date is not found in the stock data, skip this news item
        print(
            f"Skipping news item on {news_date} - opening price not found for {next_business_day_str}")
        continue

    # get predictions for each headline
    predictions = [predict_movement(headline)[0] for headline in headlines]
    # find the majority prediction
    majority_prediction = max(set(predictions), key=predictions.count)
    print(
        f"Majority prediction for date {news_date_dt} is: {majority_prediction}")

    # Apply the long-short strategy based on the majority prediction
    portfolio, position, shares = long_short_strategy(
        majority_prediction, current_price, portfolio, position, shares)
    #print(f"Current portfolio price: {portfolio}")
    portfolio_data[]
    

print(f"Final portfolio value: {portfolio}")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-31 00:00:00 is: 0
Current portfolio price: 11954.985719296024
Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-30 00:00:00 is: 1
Current portfolio price: 11140.408336256014
Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-29 00:00:00 is: 0
Current portfolio price: 16679.534540928085
Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-28 00:00:00 is: 0
Current portfolio price: 16679.534540928085
Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-27 00:00:00 is: 0
Current portfolio price: 16679.534540928085
Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-26 00:00:00 is: 0
Current portfolio price: 16679.534540928085
Opening price on 2024-09-03: 162.91547660800205
Majority prediction for date 2024-03-25 00:00:00 is: 0
Current portfol

In [11]:
import statsmodels.api as sm
from statsmodels.stats.cluster import clogit
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

stock_data_train = pd.read_csv('stock_returns_training.csv')
stock_data_train = stock_data_train.dropna()

stock_data_test = pd.read_csv('stock_returns.csv')
stock_data_test = stock_data_test.dropna()

#sentiments
merged = pd.merge(stock_data_test, predicted_class, on= 'Date', how='inner')

#Create a column for the subsequent day's return (assuming stock_data has 'Return' for daily returns)
merged['Next Day Return'] = merged.groupby('Stock_ID')['Close'].shift(-1)  # 'Stock_ID' is assumed to be the identifier

# Feature matrix X and target variable y
# Assuming 'Sentiment Label' and 'Probability' columns represent the language model scores (from your models)
X = merged[['Sentiment Label', 'Probability']]  # You can include more language model features if needed
y = merged['Next Day Return']  # The subsequent day's return

# Adding fixed effects columns for 'Firm' and 'Date'
merged['Firm'] = merged['Stock_ID']
merged['Date'] = pd.to_datetime(merged['Date'])

# Add constant term for intercept
X = sm.add_constant(X)

# Double clustering by firm and date
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': [merged['Firm'], merged['Date']]})

# Print the regression results
print(model.summary())

# Predict using the regression model
y_pred = model.predict(X)

# Evaluate the model (Mean Squared Error and R-squared)
mse = mean_squared_error(y, y_pred)
print(f'Mean Squared Error: {mse}')

ModuleNotFoundError: No module named 'statsmodels.stats.cluster'