In [1]:
import datetime
import warnings
import math

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from denoising.autoencoder import *

from utils.metrics import get_metrics
from data_prep.yahoo_fin_api import load_df


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
DATE_STR = '28022023-151344'
CONV1_KERNEL = 51
CONV2_KERNEL = 51
CONV3_KERNEL = 51

cnn_encoder = CnnEncoder(CONV1_KERNEL, CONV2_KERNEL, CONV3_KERNEL).to(device)
cnn_decoder = CnnDecoder(CONV1_KERNEL, CONV2_KERNEL, CONV3_KERNEL).to(device)

cnn_encoder.load_state_dict(torch.load(f'./model-dir/{DATE_STR}-cnn-encoder.pt'))
cnn_decoder.load_state_dict(torch.load(f'./model-dir/{DATE_STR}-cnn-decoder.pt'))

<All keys matched successfully>

In [4]:
cnn_encoder.eval(), cnn_decoder.eval()

(CnnEncoder(
   (encoder_cnn): Sequential(
     (0): Conv1d(1, 8, kernel_size=(51,), stride=(1,), padding=(51,))
     (1): ReLU(inplace=True)
     (2): Conv1d(8, 16, kernel_size=(51,), stride=(1,), padding=(51,))
     (3): ReLU(inplace=True)
     (4): Conv1d(16, 32, kernel_size=(51,), stride=(1,), padding=(51,))
     (5): ReLU(inplace=True)
     (6): Conv1d(32, 64, kernel_size=(51,), stride=(1,), padding=(51,))
     (7): ReLU(inplace=True)
   )
 ),
 CnnDecoder(
   (decoder_cnn): Sequential(
     (0): ConvTranspose1d(64, 32, kernel_size=(51,), stride=(1,), padding=(51,))
     (1): ReLU(inplace=True)
     (2): ConvTranspose1d(32, 16, kernel_size=(51,), stride=(1,), padding=(51,))
     (3): ReLU(inplace=True)
     (4): ConvTranspose1d(16, 8, kernel_size=(51,), stride=(1,), padding=(51,))
     (5): ReLU(inplace=True)
     (6): ConvTranspose1d(8, 1, kernel_size=(51,), stride=(1,), padding=(51,))
   )
 ))

In [5]:
def get_data(data_name, minmax_scaling=True):
    df = load_df(f'data/{data_name}.csv')
    if minmax_scaling:
        df[df.columns] = MinMaxScaler().fit_transform(df.values)
    data = df['Mean']
    df = df.rename(columns={'Mean': 'True'})
    data = torch.tensor(data, dtype=torch.float32).unsqueeze(0)

    return df, data

def predict(df, data):
    output = cnn_decoder(cnn_encoder(data)).detach().cpu().numpy().squeeze()
    df['CNNPred'] = output

    return df

In [6]:
snp_df, snp_data = get_data('snp', False)
snp_data = snp_data.to(device)
snp_df = predict(snp_df, snp_data)

In [7]:
snp_df['Noise'] = snp_df['True'] - snp_df['CNNPred']

In [8]:
snp_df.to_excel('data/snp-noise.xlsx')

In [9]:
noise_df = pd.DataFrame(snp_df['Noise'])
noise_df.to_excel('data/snp-noise-data.xlsx')

In [10]:
noise_df = pd.read_excel('data/snp-noise-data.xlsx', index_col=0)
noise_df.index

DatetimeIndex(['1990-02-26', '1990-02-27', '1990-02-28', '1990-03-01',
               '1990-03-02', '1990-03-05', '1990-03-06', '1990-03-07',
               '1990-03-08', '1990-03-09',
               ...
               '2023-02-09', '2023-02-10', '2023-02-13', '2023-02-14',
               '2023-02-15', '2023-02-16', '2023-02-17', '2023-02-21',
               '2023-02-22', '2023-02-23'],
              dtype='datetime64[ns]', name='Date', length=8313, freq=None)