In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [3]:
df = pd.read_csv("data.tsv", sep='\t')
df

Unnamed: 0,title,score,link,summary,published,tickers
0,Электромобильный стартап Arrival экс-главы Yot...,-0.583333,https://www.rbc.ru/technology_and_media/12/05/...,"Британский электромобильный стартап Arrival, к...","Thu, 12 May 2022 05:10:01 +0300",['ARVL']
1,Экс-глава НМТП рассказал о «напряженных отноше...,-0.314286,https://www.rbc.ru/society/16/11/2020/5fb2709d...,Экс-председатель совета директоров Новороссийс...,"Fri, 20 May 2022 19:13:18 +0300",['NMTP']
2,Шрёдер отклонил предложение войти в совет дире...,-0.333333,https://www.rbc.ru/business/20/05/2022/628772b...,Его кандидатуру выдвинули в начале февраля. Ка...,"Tue, 24 May 2022 22:12:05 +0300",['GAZP']
3,Шельф берут в разработку // Генподрядчиком «Га...,0.700000,https://www.kommersant.ru/doc/5482398,"Как стало известно “Ъ”, «Аврора» Андрея Патруш...","Fri, 29 Jul 2022 00:28:00 +0300",['GAZP']
4,"Чистый убыток ""Юнипро"" в 1 полугодии 2022 года...",-0.611111,https://www.finam.ru/analysis/newsitem/chistyi...,"Чистый убыток ""Юнипро"" в 1 полугодии 2022 года...","Thu, 28 Jul 2022 12:43:00 +0300",['UPRO']
...,...,...,...,...,...,...
527,"""КуйбышевАзот"" зафиксировал рост выработки осн...",0.782609,https://www.finam.ru/analysis/newsitem/kuiybys...,"""КуйбышевАзот"" зафиксировал рост выработки осн...","Wed, 27 Jul 2022 10:15:14 +0300",['KAZT']
528,"""Газпром"" прекратил поставки газа в Латвию",-0.333333,https://www.finam.ru/analysis/newsitem/gazprom...,"С сегодняшнего дня ""Газпром"" прекратил поставк...","Sat, 30 Jul 2022 10:19:14 +0300",['GAZP']
529,"""Газпром"" опубликовал часть документов по КС ""...",-0.214286,https://www.finam.ru/analysis/newsitem/gazprom...,"""Газпром"" опубликовал часть документов по КС ""...","Fri, 29 Jul 2022 22:54:18 +0300",['GAZP']
530,"""Газпром нефть"" испытала российские поверхност...",0.574468,https://www.finam.ru/analysis/newsitem/gazprom...,"""Газпром нефть"" испытала на Холмогорском место...","Tue, 26 Jul 2022 10:24:43 +0300","['GAZP', 'SIBN']"


In [4]:

# Assuming you have a labeled dataset in a DataFrame df with columns 'Headline' and 'Sentiment'
class FinancialDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=512):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data['title'].iloc[idx]
        label = self.data['score'].iloc[idx]

        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label)
        }


In [5]:
# Assuming you have a labeled dataset df with columns 'Headline' and 'SentimentScore' for regression
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

# Modify the model architecture for regression
finbert_model.classifier = torch.nn.Linear(finbert_model.config.hidden_size, 1)

# Prepare your dataset
financial_dataset = FinancialDataset(df, tokenizer)

# Fine-tune the model for regression
optimizer = torch.optim.AdamW(
    finbert_model.parameters(),
    lr=1e-5
)
loss_fn = torch.nn.MSELoss()


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define a simple RNN model
class StockPricePredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(StockPricePredictor, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Take the output from the last time step
        return out

# Generate some random data for demonstration purposes
# In a real-world scenario, you would replace this with your actual stock price data
np.random.seed(0)
closing_prices = np.random.randn(100, 5)  # 100 samples, 5 days

# Convert the data to PyTorch tensors
inputs = torch.tensor(closing_prices[:, :-1], dtype=torch.float32).unsqueeze(0)  # Input: closing prices for the previous 5 days
targets = torch.tensor(closing_prices[:, -1], dtype=torch.float32).unsqueeze(0)  # Target: closing price for the next day

# Model hyperparameters
input_size = 5  # Number of features (closing prices for the previous 5 days)
hidden_size = 10  # Number of hidden units
output_size = 1  # Number of output units

# Instantiate the model
model = StockPricePredictor(input_size, hidden_size, output_size)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# After training, you can use the model to make predictions
model.eval()
with torch.no_grad():
    test_input = torch.tensor(closing_prices[-1, 1:], dtype=torch.float32).unsqueeze(0)  # Use the last 5 days as input for prediction
    predicted_price = model(test_input)
    print(f'Predicted Stock Price: {predicted_price.item()}')


RuntimeError: input.size(-1) must be equal to input_size. Expected 5, got 4