In [1]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import Resize, Normalize, Compose, ToTensor
from torchvision.models import mobilenet_v3_small
import torchvision
import torch
from datasets import ForexDataWithWindow
import pandas as pd
import numpy as np
import models.time2vec as time2vec
import mplfinance as mpf
from scipy import stats
import yfinance as yf
from scipy.spatial import distance
from tqdm import tqdm

In [4]:
dataset = ForexDataWithWindow("./data/USDJPY_H1.csv", header=0, normalize=False, data_order="tohlc", input_duration=31, time_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data["Time"] = pd.to_datetime(self.data["Time"])


In [2]:
model = time2vec.Time2Vec.load_from_checkpoint("audjpy_model.ckpt", map_location="cpu")
model.eval()

Time2Vec(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          

In [None]:
def get_prob_dist(dataset, model=None):
    for i in range(0, len(dataset), 1000):
        data = dataset.getInterval(i)
        x = data.iloc[:30]
        y = data.iloc[30]
        if model is None:
            prob = time2vec.predict(x, checkpoint_path="epoch=93-step=7896.ckpt")
        else:
            prob = time2vec.predict(x, model)
        prediction = time2vec.most_probable(prob)
        flag = False
        d = y["Close"] - x["Close"][-1]
        if d > 0.03 and prediction == "up": 
            flag = True
        elif d < -0.03 and prediction == "down":
            flag = True
        elif (d <= 0.03 or d >= -0.03) and prediction == "stationary":
            flag = True

        res = stats.linregress(list(range(30)), x["Close"].to_numpy())
        yield res.slope, flag

In [None]:
prob_dist = list(get_prob_dist(dataset, model))

In [None]:
prob_dist_df = pd.DataFrame(prob_dist, columns=["slope", "flag"])

In [None]:
prob_dist_df.describe()

In [6]:
def get_vectors(dataset, model=None):
    for i in tqdm(range(0, len(dataset), 1000)):
        data = dataset.getInterval(i)
        x = data.iloc[:30]
        if model is None:
            vec = time2vec.time2vec(x, checkpoint_path="epoch=93-step=7896.ckpt")
        else:
            vec = time2vec.time2vec(x, model)
        
        yield x.iloc[0].name, vec

In [7]:
vecs = list(get_vectors(dataset, model))

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:19<00:00,  5.11it/s]


<Figure size 640x480 with 0 Axes>

In [8]:
vecs

[(Timestamp('2007-03-13 16:00:00'),
  array([[ 0.05260894, -1.2463449 ,  0.30584288, -0.5571336 ,  0.35399675,
          -0.02799349,  0.26113686, -1.1365316 ,  0.39695212, -0.5434206 ,
           0.7225686 , -0.7442149 ,  1.027528  ,  0.26761958,  0.08083867,
           0.2996803 ,  1.5268518 ,  0.41260016,  1.8304898 ,  0.3905418 ,
           0.69631416,  0.35878897, -2.0017812 ,  0.53768665, -0.03441096,
          -0.676297  ,  0.10267323, -0.7217689 , -0.18688536,  0.3838374 ,
          -0.89964265,  0.31259108, -0.37552035,  1.6786467 ,  0.25696653,
          -0.06845704,  0.6339795 , -0.5685474 , -0.7214829 , -0.49785146,
           0.41937292, -0.13789769, -0.8924697 , -1.0020002 ,  0.22760095,
           1.9528098 ,  0.3396097 , -0.10228204,  1.0702454 , -0.9938021 ,
           0.13229993, -0.00538329,  0.7784317 ,  1.2967161 ,  0.76722205,
          -0.601084  ,  0.70429295, -2.3548422 , -1.2649046 ,  0.92287076,
           0.44921714, -0.0569427 , -1.647896  , -0.13462469, -0

In [9]:
vecs_df = pd.DataFrame(vecs, columns=["Time", "vec"])

In [10]:
vecs_df

Unnamed: 0,Time,vec
0,2007-03-13 16:00:00,"[[0.052608944, -1.2463449, 0.30584288, -0.5571..."
1,2007-05-10 08:00:00,"[[-0.58210295, -0.26954478, -0.3167297, -1.002..."
2,2007-07-09 00:00:00,"[[0.50214994, -1.1640081, -0.5192096, -0.98524..."
3,2007-09-04 18:00:00,"[[-0.09912628, -0.25360155, 0.05359376, 0.3183..."
4,2007-11-01 12:00:00,"[[0.11769459, -1.5891379, -0.58202505, -0.3357..."
...,...,...
95,2022-06-21 02:00:00,"[[-0.5497308, -2.4591537, -0.7143351, -0.31895..."
96,2022-08-17 18:00:00,"[[0.5484353, -0.2696594, -0.24784824, -0.94252..."
97,2022-10-14 10:00:00,"[[-0.4502816, -0.6110053, -0.71096534, 0.01463..."
98,2022-12-13 05:00:00,"[[1.8483318, -1.1538614, 0.0366548, -0.8938278..."


In [13]:
vecs_df.to_csv("usdjpy_vecs.csv")

In [6]:
ticker = yf.Ticker("AUDJPY=X")
data_now = ticker.history(interval="15m", period="1d")

In [None]:
data_now.iloc[0].name.strftime("%m/%d/%Y, %H:%M:%S")

In [None]:
vec_now = time2vec.time2vec(data_now[:30], model)

In [None]:
vecs_df["vec"].to_numpy()[0][0]

In [None]:
dists = np.vectorize(lambda x: distance.cdist(vec_now, x, "cosine"))(vecs_df["vec"])

In [None]:
np.argmax(dists)

In [None]:
vecs_df.iloc[47]["Time"]

In [None]:
pd.date_range(vecs_df.iloc[47]["Time"], periods=30, freq="H")

In [None]:
similar_df = dataset.data.loc[pd.date_range(vecs_df.iloc[47]["Time"], periods=50, freq="H")]

In [None]:
mpf.plot(data_now[-30:], type='candle', returnfig=True, scale_padding=0, style='charles')
mpf.plot(similar_df, type='candle', returnfig=True, scale_padding=0, style='charles')

In [3]:
model = time2vec.Time2Vec.load_from_checkpoint("audjpy_model.ckpt", map_location="cpu")
model.eval()

Time2Vec(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          

In [4]:
traced_model = model.to_torchscript(method="trace", example_inputs=torch.randn(1,3,224,224))

In [5]:
torch.jit.save(traced_model, "time2vec_audjpy.pt")

In [13]:
time2vec.predict(data_now[-30:], traced_model)

array([0.0240137 , 0.16673672, 0.8092495 ], dtype=float32)

<Figure size 640x480 with 0 Axes>