In [1]:
use_gpu = True
use_ramdon_split = False
use_dataparallel = True

In [2]:
import os
import sys

In [3]:
print("Current Python path:", sys.path)

utils_dir = r'C:\Users\zhaox\Documents\Python\Stock_CNN-main\utils'

if utils_dir not in sys.path:
    sys.path.append(utils_dir)

try:
    import gpu_tools
    print("Successfully imported gpu_tools")
except Exception as e:
    print("Error importing gpu_tools:", e)

    print("\nContents of the utils directory:")
    for item in os.listdir(utils_dir):
        print(item)

Current Python path: ['C:\\Users\\zhaox\\Documents\\Python\\Stock_CNN-main\\notebooks', 'C:\\Users\\zhaox\\.conda\\envs\\top\\python39.zip', 'C:\\Users\\zhaox\\.conda\\envs\\top\\DLLs', 'C:\\Users\\zhaox\\.conda\\envs\\top\\lib', 'C:\\Users\\zhaox\\.conda\\envs\\top', '', 'C:\\Users\\zhaox\\AppData\\Roaming\\Python\\Python39\\site-packages', 'C:\\Users\\zhaox\\.conda\\envs\\top\\lib\\site-packages', 'C:\\Users\\zhaox\\.conda\\envs\\top\\lib\\site-packages\\win32', 'C:\\Users\\zhaox\\.conda\\envs\\top\\lib\\site-packages\\win32\\lib', 'C:\\Users\\zhaox\\.conda\\envs\\top\\lib\\site-packages\\Pythonwin', 'C:\\Users\\zhaox\\.conda\\envs\\top\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\zhaox\\.ipython']
Successfully imported gpu_tools


In [4]:
import os
import sys
sys.path.insert(0, '..')


import os
import re 

def query_gpu(qargs=[]):
    qargs =['index','gpu_name', 'memory.free']+ qargs
    cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(','.join(qargs))
    results = os.popen(cmd).readlines()
    return results

def select_gpu(results, thres=4096):
    avali = []
    try:
        for i, line in enumerate(results):
            if int(re.findall('(.*), (.*?) MiB', line)[0][-1]) > thres:
                avali.append(i)
        return avali
    except:
        return ''
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([ str(obj) for obj in select_gpu(query_gpu())])

import time
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split



torch.manual_seed(42)

IMAGE_WIDTH = {5: 15, 20: 60, 60: 180}
IMAGE_HEIGHT = {5: 32, 20: 64, 60: 96}  

## load data

here we choose 1993-2001 data as our training(include validation) data, the remaining will be used in testing.

In [5]:
# Define the number of segments (you can adjust this based on how many segments you have)
num_segments = 137  # Adjust this based on the number of segments you have

# Define the target 2D shape (e.g., 48 x 48)
desired_height = 48
desired_width = 48
desired_size = desired_height * desired_width  # 2304 elements in total

# Create empty lists to store the images and labels
images = []
label_df = []

# Iterate over each segment (assuming files are named like AAPL_segment_1.dat, AAPL_segment_2.dat, etc.)
for segment in range(1, num_segments + 1):
    # Load image data from binary .dat file
    image_path = f"AAPL_segment_{segment}.dat"
    
    # Read the image data as 1D array
    image_data = np.memmap(image_path, dtype=np.uint8, mode='r')
    
    # Trim or pad the data as needed to fit the desired shape
    if image_data.size > desired_size:
        # Trim the data if it's larger than the desired size
        final_image_data = image_data[:desired_size]
    elif image_data.size < desired_size:
        # Pad the data with zeros if it's smaller than the desired size
        padding_needed = desired_size - image_data.size
        final_image_data = np.pad(image_data, (0, padding_needed), mode='constant')
    else:
        final_image_data = image_data  # No padding or trimming needed

    # Reshape the final data into a 48x48 2D array
    reshaped_image = final_image_data.reshape((desired_height, desired_width))
    
    # Append each segment's 2D image data to the list
    images.append(reshaped_image)
    
    # Load label data from feather file
    label_path = f"AAPL_segment_{segment}.feather"
    label_data = pd.read_feather(label_path)
    label_df.append(label_data)  # Append each segment's label data


    
images = np.concatenate(images)
label_df = pd.concat(label_df)

print(images.shape)
print(label_df.shape)
print(label_df)

(6576, 48)
(2740, 7)
          Date       Open       High        Low      Close     Volume  \
0   2009-02-02   2.687241   2.774705   2.681209   2.759926  558247200   
1   2009-02-03   2.772292   2.816325   2.722830   2.804261  599309200   
2   2009-02-04   2.811500   2.902884   2.807880   2.821452  808421600   
3   2009-02-05   2.797927   2.933043   2.793404   2.909217  749246400   
4   2009-02-06   2.926107   3.015984   2.925504   3.007539  687209600   
..         ...        ...        ...        ...        ...        ...   
15  2019-12-12  64.973414  66.133220  64.861803  65.866318  137310400   
16  2019-12-13  65.866303  66.798029  65.737705  66.761635  133587600   
17  2019-12-16  67.210548  68.130146  67.205698  67.904488  128186000   
18  2019-12-17  67.834111  68.367909  67.647276  68.037926  114158400   
19  2019-12-18  67.889916  68.399456  67.724925  67.875359  116028400   

    20 Day MA  
0    2.678585  
1    2.676172  
2    2.676971  
3    2.685190  
4    2.695776  
..    

## build dataset

In [6]:
class MyDataset(Dataset):
    
    def __init__(self, img, label):
        self.img = torch.Tensor(img.copy())
        self.label = torch.Tensor(label)
        self.len = len(img)
  
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.img[idx], self.label[idx]

Split method (not random split is recommended)

In [7]:
if not use_ramdon_split:
    train_val_ratio = 0.7
    split_idx = int(images.shape[0] * 0.7)
    train_dataset = MyDataset(images[:split_idx], (label_df.Ret_20d > 0).values[:split_idx])
    val_dataset = MyDataset(images[split_idx:], (label_df.Ret_20d > 0).values[split_idx:])
else:
    dataset = MyDataset(images, (label_df.Ret_20d > 0).values)
    train_val_ratio = 0.7
    train_dataset, val_dataset = random_split(dataset, \
        [int(dataset.len*train_val_ratio), dataset.len-int(dataset.len*train_val_ratio)], \
        generator=torch.Generator().manual_seed(42))
    del dataset

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=False, pin_memory=True)

AttributeError: 'DataFrame' object has no attribute 'Ret_20d'

## models

In [6]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.)
    elif isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)

In [7]:
from models import baseline

device = 'cuda' if use_gpu else 'cpu'
export_onnx = True
net = baseline.Net().to(device)
net.apply(init_weights)

if export_onnx:
    import torch.onnx
    x = torch.randn([1,1,64,60]).to(device)
    torch.onnx.export(net,               # model being run
                      x,                         # model input (or a tuple for multiple inputs)
                      "../cnn_baseline.onnx",   # where to save the model (can be a file or file-like object)
                      export_params=False,        # store the trained parameter weights inside the model file
                      opset_version=10,          # the ONNX version to export the model to
                      do_constant_folding=False,  # whether to execute constant folding for optimization
                      input_names = ['input_images'],   # the model's input names
                      output_names = ['output_prob'], # the model's output names
                      dynamic_axes={'input_images' : {0 : 'batch_size'},    # variable length axes
                                     'output_prob' : {0 : 'batch_size'}})


### Profiling

In [8]:
count = 0
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())
    count += parameters.numel()
print('total_parameters : {}'.format(count))

layer1.0.weight : torch.Size([64, 1, 5, 3])
layer1.0.bias : torch.Size([64])
layer1.1.weight : torch.Size([64])
layer1.1.bias : torch.Size([64])
layer2.0.weight : torch.Size([128, 64, 5, 3])
layer2.0.bias : torch.Size([128])
layer2.1.weight : torch.Size([128])
layer2.1.bias : torch.Size([128])
layer3.0.weight : torch.Size([256, 128, 5, 3])
layer3.0.bias : torch.Size([256])
layer3.1.weight : torch.Size([256])
layer3.1.bias : torch.Size([256])
fc1.1.weight : torch.Size([2, 46080])
fc1.1.bias : torch.Size([2])
total_parameters : 708866


In [9]:
from thop import profile as thop_profile

flops, params = thop_profile(net, inputs=(next(iter(train_dataloader))[0].to(device),))
print('FLOPs = ' + str(flops/1000**3) + 'G')
print('Params = ' + str(params/1000**2) + 'M')

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_relu() for <class 'torch.nn.modules.activation.LeakyReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.activation.Softmax'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'models.baseline.Net'>. Treat it as zero Macs and zero Params.[00m
FLOPs = 36.21961728G
Params = 0.708866M


In [10]:
from torch.profiler import profile, record_function, ProfilerActivity

inputs = next(iter(train_dataloader))[0].to(device)

with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof:
    with record_function("model_inference"):
        net(inputs)

prof.export_chrome_trace("../trace.json")
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference        12.02%       1.426ms        66.87%       7.931ms       7.931ms       0.000us         0.00%       9.862ms       9.862ms             1  
                                           aten::conv2d         0.26%      31.000us        24.60%       2.917ms     972.333us       0.000us         0.00%       7.306ms       2.435ms             3  
         

## train

In [11]:
def train_loop(dataloader, net, loss_fn, optimizer):
    
    running_loss = 0.0
    current = 0
    net.train()
    
    with tqdm(dataloader) as t:
        for batch, (X, y) in enumerate(t):
            X = X.to(device)
            y = y.to(device)
            y_pred = net(X)
            loss = loss_fn(y_pred, y.long())
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss = (len(X) * loss.item() + running_loss * current) / (len(X) + current)
            current += len(X)
            t.set_postfix({'running_loss':running_loss})
    
    return running_loss

In [12]:
def val_loop(dataloader, net, loss_fn):

    running_loss = 0.0
    current = 0
    net.eval()
    
    with torch.no_grad():
        with tqdm(dataloader) as t:
            for batch, (X, y) in enumerate(t):
                X = X.to(device)
                y = y.to(device)
                y_pred = net(X)
                loss = loss_fn(y_pred, y.long())

                running_loss += loss.item()
                running_loss = (len(X) * running_loss + loss.item() * current) / (len(X) + current)
                current += len(X)
            
    return running_loss

In [13]:
# net = torch.load('/home/clidg/proj_2/pt/baseline_epoch_10_train_0.6865865240322523_eval_0.686580_.pt')

In [17]:
if use_gpu and use_dataparallel and 'DataParallel' not in str(type(net)):
    net = net.to(device)
    net = nn.DataParallel(net)

In [18]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-5)

start_epoch = 0
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 5

from torch.utils.tensorboard import SummaryWriter
tb = SummaryWriter()

In [19]:
start_time = datetime.datetime.now().strftime('%Y%m%d_%H:%M:%S')
os.mkdir('../pt'+os.sep+start_time)
epochs = 100
for t in range(start_epoch, epochs):
    print(f"Epoch {t}\n-------------------------------")
    time.sleep(0.2)
    train_loss = train_loop(train_dataloader, net, loss_fn, optimizer)
    val_loss = val_loop(val_dataloader, net, loss_fn)
    tb.add_histogram("train_loss", train_loss, t)
    torch.save(net, '../pt'+os.sep+start_time+os.sep+'baseline_epoch_{}_train_{:5f}_val_{:5f}.pt'.format(t, train_loss, val_loss)) 
    if val_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = val_loss
    elif t - last_min_ind >= early_stopping_epoch:
        break

print('Done!')
print('Best epoch: {}, val_loss: {}'.format(last_min_ind, min_val_loss))

Epoch 0
-------------------------------


 13%|█▎        | 551/4337 [00:14<02:33, 24.60it/s, running_loss=0.901]