## Fix Random Seed

In [5]:
import numpy as np
import torch
import random
import pandas as pd
from tqdm.auto import tqdm
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader, random_split
import math
import csv

def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(87)

def train_valid_split(data_set, valid_ratio, seed):
    '''Split provided training data into training set and validation set'''
    valid_set_size = int(valid_ratio * len(data_set)) 
    train_set_size = len(data_set) - valid_set_size
    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)

## Dataloader
- Split dataset into training dataset(90%) and validation dataset(10%).
- Create dataloader to iterate the data.

In [6]:
class MyDataset():
    def __init__(self,dataset , label):
      self.dataset = torch.from_numpy( dataset.to_numpy() )
      self.label = torch.from_numpy( label.to_numpy() )

    def __getitem__(self,index):
      return self.dataset[index],self.label[index]

    def __len__(self):
      return len(self.dataset)

# Model
- TransformerEncoderLayer:
  - Base transformer encoder layer in [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
  - Parameters:
    - d_model: the number of expected features of the input (required).

    - nhead: the number of heads of the multiheadattention models (required).

    - dim_feedforward: the dimension of the feedforward network model (default=2048).

    - dropout: the dropout value (default=0.1).

    - activation: the activation function of intermediate layer, relu or gelu (default=relu).

- TransformerEncoder:
  - TransformerEncoder is a stack of N transformer encoder layers
  - Parameters:
    - encoder_layer: an instance of the TransformerEncoderLayer() class (required).

    - num_layers: the number of sub-encoder-layers in the encoder (required).

    - norm: the layer normalization component (optional).

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Reggsor(nn.Module):
	def __init__(self, d_model=32, n_spks=1, dropout=0.1):
		super().__init__()
		# Project the dimension of features from that of input into d_model.
		self.prenet = nn.Linear(16, d_model)
		# TODO:
		#   Change Transformer to Conformer.
		#   https://arxiv.org/abs/2005.08100

		self.encoder_layer = nn.TransformerEncoderLayer(
			d_model=d_model, nhead=1 , batch_first = True
		)
		# self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=2)

		# Project the the dimension of features from d_model into speaker nums.
		self.pred_layer = nn.Sequential(
			nn.Linear(d_model, d_model),
			nn.ReLU(),
			nn.Linear(d_model, n_spks),
		)
	
		self.double()

	def forward(self, mels):
		"""
		args:
			mels: (batch size, 16)
		return:
			out: (batch size, value)
		"""

		#print("!" , mels.shape)

		#
		# out: batch , d_model
		#mels = mels.unsqueeze(2)
		#print(mels.shape[0])
		mels = mels.repeat(1,1,1).reshape(mels.shape[0],1,16)
		#print(mels.shape)
		out = self.prenet(mels)
		
		#print(out.shape)
		# out: (length, batch size, d_model)
		#out = out.permute(1, 0)
		# The encoder layer expect features in the shape of (length, batch size, d_model).
		out = self.encoder_layer(out)
		# out: (batch size, length, d_model)
		#print(out.shape)
		#out = out.transpose(0, 1)
		# mean pooling
		stats = out.mean(dim=1)

		# out: (batch, n_spks)
		out = self.pred_layer(stats)
		#print(out)
		return out

In [17]:
config = {
    'seed': 5201314,      # Your seed number, you can pick your lucky number. :)
    'select_all': True,   # Whether to use all features.
    'valid_ratio': 0.2,   # validation_size = train_size * valid_ratio
    'n_epochs': 500,     # Number of epochs.            
    'batch_size': 64, 
    'learning_rate': 1e-3,              
    'early_stop': 300,    # If model has not improved for this many consecutive epochs, stop training.     
    'save_path': '/kaggle/working/model.ckpt'  # Your model will be saved here.
}

In [9]:
stocks_data = pd.read_csv('/kaggle/input/top200-trainingcsv/encoded-top200_training.csv')
stocks_data = stocks_data.drop(columns=['證券代碼'])
data_years = stocks_data['年月'].unique()

train_years = 7
train_data = stocks_data[stocks_data['年月'].isin(data_years[:train_years])]
train_data_label = train_data['Return']
train_data = train_data.drop(columns=['簡稱', '年月', 'Return', 'ReturnMean_year_Label'])

valid_years = 2
valid_data = stocks_data[stocks_data['年月'].isin(data_years[valid_years:])]
valid_data_label = valid_data['Return']
valid_data = valid_data.drop(columns=['簡稱', '年月', 'Return', 'ReturnMean_year_Label'])

test_years = 1
test_data = stocks_data[stocks_data['年月'].isin(data_years[test_years:])]
test_data_label = test_data['Return']
test_data = test_data.drop(columns=['簡稱', '年月', 'Return', 'ReturnMean_year_Label'])

# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
batch_size = 64
train_set = MyDataset(train_data , train_data_label)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = MyDataset(valid_data , valid_data_label)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
test_set = MyDataset(test_data , test_data_label)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

In [10]:
_exp_name = "sample"

In [11]:
def trainer(config):
    # "cuda" only when GPUs are available.
    device = "cuda" if torch.cuda.is_available() else "cpu"
    writer = SummaryWriter()

    # The number of training epochs and patience.
    # Initialize trackers, these are not parameters and should not be changed
    #stale = 0
    n_epochs = config['n_epochs']
    patience = 300 # If no improvement in 'patience' epochs, early stop
    best_loss = math.inf
    step = 0
    early_stop_count = 0


    # Initialize a model, and put it on the device specified.
    model = Reggsor().to(device)

    # For the classification task, we use cross-entropy as the measurement of performance.
    criterion = nn.MSELoss()

    # Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=0) 

    
    for epoch in range(n_epochs):

        # ---------- Training ----------
        # Make sure the model is in train mode before training.
        model.train()

        # These are used to record information in training.
        train_loss = []
        train_accs = []
        loss_record = []

        train_pbar = tqdm(train_loader, position=0, leave=True)
        for batch in train_pbar:

            # A batch consists of image data and corresponding labels.
            data , labels = batch
            # Forward the data. (Make sure data and model are on the same device.)
            
            logits = model(data.to(device))
            
            # Calculate the cross-entropy loss.
            # We don't need to apply softmax before computing cross-entropy as it is done automatically.
            loss = criterion(logits, labels.to(device))

            # Gradients stored in the parameters in the previous step should be cleared out first.
            optimizer.zero_grad()

            # Compute the gradients for parameters.
            loss.backward()

            # Clip the gradient norms for stable training.
            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

            # Update the parameters with computed gradients.
            optimizer.step() 
            step += 1
            loss_record.append(loss.detach().item())
            # Compute the accuracy for current batch.
            #acc = (logits.argmax(dim=1) == labels.to(device)).float().mean()
            train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
            train_pbar.set_postfix({'loss': loss.detach().item()})
            #preds = logits.argmax(1)
            #acc = torch.mean((preds == labels).float())

            # Record the loss and accuracy.
            #train_loss.append(loss.item())
            #train_accs.append(acc)
        mean_train_loss = sum(loss_record)/len(loss_record)
        writer.add_scalar('Loss/train', mean_train_loss, step)
            
        #train_loss = sum(train_loss) / len(train_loss)
        #train_acc = sum(train_accs) / len(train_accs)

        # Print the information.
        #print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

        # ---------- Validation ----------
        # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
        model.eval()

        # These are used to record information in validation.
        valid_loss = []
        valid_accs = []
        loss_record = []
        # Iterate the validation set by batches.
        for batch in tqdm(valid_loader):

            # A batch consists of image data and corresponding labels.
            data , labels = batch
            #imgs = imgs.half()

            # We don't need gradient in validation.
            # Using torch.no_grad() accelerates the forward process.
            with torch.no_grad():
                logits = model(data.to(device))

            # We can still compute the loss (but not the gradient).
            loss = criterion(logits, labels.to(device))
            loss_record.append(loss.item())
            # Compute the accuracy for current batch.
            #acc = (logits.argmax(dim=1) == labels.to(device)).float().mean()
            #acc = (logits.argmax(dim=1) == labels.to(device)).float().mean()
            #preds = logits.argmax(1)
            #acc = torch.mean((preds == labels).float())

            # Record the loss and accuracy.
        mean_valid_loss = sum(loss_record)/len(loss_record)
        print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
        writer.add_scalar('Loss/valid', mean_valid_loss, step)

        if mean_valid_loss < best_loss:
            best_loss = mean_valid_loss
            torch.save(model.state_dict(), config['save_path']) # Save your best model
            print('Saving model with loss {:.3f}...'.format(best_loss))
            early_stop_count = 0
        else: 
            early_stop_count += 1

        if early_stop_count >= config['early_stop']:
            print('\nModel is not improving, so we halt the training session.')
            return 

In [18]:
trainer(config)

  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

Epoch [1/4]: Train loss: 3266.1307, Valid loss: 4932.2593
Saving model with loss 4932.259...


  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

Epoch [2/4]: Train loss: 3256.7116, Valid loss: 4998.6807


  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

Epoch [3/4]: Train loss: 3307.6954, Valid loss: 4924.0001
Saving model with loss 4924.000...


  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

Epoch [4/4]: Train loss: 3270.1952, Valid loss: 4906.8990
Saving model with loss 4906.899...


In [19]:
def predict(test_loader, model, device):
    model.eval() # Set your model to evaluation mode.
    preds = []
    for batch in tqdm(test_loader):
        data , labels = batch
        data = data.to(device)                     
        with torch.no_grad():                   
            pred = model(data)                     
            preds.append(pred.detach().cpu())   
    preds = torch.cat(preds, dim=0).numpy()  
    return preds

In [20]:
def save_pred(preds, file):
    ''' Save predictions to specified file '''
    with open(file, 'w') as fp:
        writer = csv.writer(fp)
        writer.writerow(['id', 'tested_positive'])
        for i, p in enumerate(preds):
            writer.writerow([i, p])

device = "cuda" if torch.cuda.is_available() else "cpu"
model = Reggsor().to(device)
#model = My_Model(input_dim=x_train.shape[1]).to(device)
model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader, model, device) 
save_pred(preds, 'pred.csv')

  0%|          | 0/38 [00:00<?, ?it/s]