In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/jpx-tokyo-stock-exchange-prediction/stock_list.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/sample_submission.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/options.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/financials.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/secondary_stock_prices.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/trades.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/example_test_files/stock_prices.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/jpx_tokyo_market_prediction/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/jpx-tokyo-stock-exchange-prediction/jpx_tokyo_market_prediction/__init__.py
/kaggle/input/jpx-tokyo-stock-exchange-prediction/data_specifications/stock_fin_spec.csv
/kaggle/input/jpx-tokyo-stock-exchange-prediction/data_specifications/trades_spec.csv
/kaggle/input/jpx-tokyo-stock-

In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset

# Define a custom dataset for handling stock data
class StockDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

# Define the transformer model
class StockTransformer(nn.Module):
    def __init__(self, input_feature_size, d_model, num_layers, heads, dropout_rate=0.1):
        super(StockTransformer, self).__init__()
        self.d_model = d_model
        self.transformer = nn.Transformer(d_model=self.d_model, nhead=heads, 
                                          num_encoder_layers=num_layers, 
                                          num_decoder_layers=num_layers, 
                                          dropout=dropout_rate)
        self.fc_out = nn.Linear(self.d_model, 1)
        # Project input features to d_model size
        self.input_projection = nn.Linear(input_feature_size, self.d_model)

    def forward(self, src):
        # Ensure src is projected to d_model size
        src = self.input_projection(src)
        # For the transformer, src and tgt need to be the same in this context
        out = self.transformer(src, src)
        return self.fc_out(out)

# Handling data more robustly
def process_data(data):
    data = data.fillna(method='ffill').fillna(method='bfill').fillna(0)
    le = LabelEncoder()
    for col in data.columns:
        if data[col].dtype == np.object_ or data[col].dtype == 'category':
            data[col] = le.fit_transform(data[col].astype(str))
        data[col] = data[col].astype(float)
    return data

data = pd.read_csv('/kaggle/input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv')
data = process_data(data)
features = torch.tensor(data.drop('Target', axis=1).values, dtype=torch.float32)
targets = torch.tensor(data['Target'].values, dtype=torch.float32).unsqueeze(1)

# Creating datasets
dataset = StockDataset(features, targets)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Adjust feature size and number of heads
input_feature_size = features.shape[1]
d_model = 16  # Adjust this to be a multiple of the number of heads (e.g., 8 heads)
heads = 8

# Model training
model = StockTransformer(input_feature_size=input_feature_size, d_model=d_model, num_layers=6, heads=heads, dropout_rate=0.1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10  # Specify the number of epochs

for epoch in range(num_epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(dataloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Accumulate the loss
        running_loss += loss.item()

    # Compute the average loss for the epoch
    avg_loss = running_loss / len(dataloader)
    print(f'Epoch {epoch + 1}, Average Loss: {avg_loss:.4f}')

print('Finished Training')

Epoch 1, Average Loss: 0.0006
Epoch 2, Average Loss: 0.0005
Epoch 3, Average Loss: 0.0005
Epoch 4, Average Loss: 0.0005
Epoch 5, Average Loss: 0.0005
Epoch 6, Average Loss: 0.0005
Epoch 7, Average Loss: 0.0005
Epoch 8, Average Loss: 0.0005
Epoch 9, Average Loss: 0.0005
Epoch 10, Average Loss: 0.0005
Finished Training
