In [1]:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModel, AutoTokenizer

from features import NewsFeature
from data_tools.containers import StocksData

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
stocks_data = StocksData("./data/market_data/AAPL.csv")

In [3]:
news_feature = NewsFeature("./data/news_data/politics_embeddings.csv")

In [6]:
ticker_data = stocks_data.get_data()
news_embeddings = news_feature.news_embeddings

In [7]:
ticker_data = ticker_data.sort_index()
news_ewm = news_embeddings.ewm(alpha=0.9).mean()

embeddings = {}
for idx, ticker_row in tqdm(ticker_data.iterrows()):
    if idx > 5:
        break
    news_embedding = news_ewm[news_ewm.index <= ticker_row.name].iloc[-1:]
    embeddings[ticker_row.name] = news_embedding

3374it [01:51, 30.13it/s]  

KeyboardInterrupt



In [20]:
threshold = stocks_data.iloc[-10].name

foo = stocks_embeddings[stocks_embeddings.index < threshold].iloc[:10]

In [22]:
foo.index

DatetimeIndex(['2022-02-14 11:21:00', '2022-02-14 22:51:00',
               '2022-02-15 00:11:00', '2022-02-15 00:16:00',
               '2022-02-15 11:27:00', '2022-02-15 13:01:00',
               '2022-02-15 15:26:00', '2022-02-15 15:51:00',
               '2022-02-15 20:47:00', '2022-02-15 22:20:00'],
              dtype='datetime64[ns]', name='date', freq=None)

In [25]:
foo.reset_index().ewm(alpha=0.9).mean()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,758,759,760,761,762,763,764,765,766,767
0,-0.052625,-0.056637,0.27578,-0.061722,0.498975,-0.385128,0.204143,0.01047,-0.201437,0.021336,...,-0.045116,-0.26343,0.112653,0.243946,0.108075,0.558196,0.105344,-0.061896,0.735771,-0.22338
1,-0.128359,-0.049053,0.197311,-0.222831,0.285092,-0.359337,0.100161,0.067087,-0.148541,-0.246603,...,-0.149059,-0.274455,0.111367,0.146011,0.104675,0.448664,0.122419,0.071826,0.802571,-0.16907
2,0.00669,0.069596,0.168903,-0.228434,0.253686,-0.228285,0.202662,0.067899,-0.080557,-0.402648,...,-0.050494,-0.1584,0.083408,0.157422,0.13438,0.374639,0.467917,-0.126152,0.693169,-0.292607
3,-0.219919,0.02433,0.116901,-0.28739,0.244858,-0.435104,0.112736,0.106872,-0.10256,-0.308729,...,-0.123579,-0.278686,0.034947,0.087639,0.082654,0.229179,0.238416,-0.021327,0.900522,-0.187859
4,-0.201045,0.071557,0.03454,-0.252656,0.100022,-0.299021,-0.028575,0.201811,0.01877,-0.356333,...,0.0332,-0.158462,0.079375,-0.070098,0.032155,0.198703,0.255428,-0.166163,0.745571,-0.253491
5,-0.179018,0.213791,0.00149,-0.375957,-0.067378,-0.293454,-0.083253,0.08386,-0.101363,-0.487327,...,0.046695,-0.10592,0.083447,-0.06442,-0.061741,0.213542,0.344195,-0.255588,0.911199,-0.238787
6,-0.223032,0.278976,0.076172,-0.273024,0.137922,-0.397014,-0.050238,0.170505,-0.109229,-0.314697,...,0.137417,-0.103513,0.147885,0.03939,-0.017857,0.381245,0.324674,-0.203005,0.895548,-0.140957
7,-0.233719,0.33331,0.114915,-0.29028,0.210378,-0.324248,0.053708,0.077146,0.00784,-0.404672,...,0.03079,-0.266012,-0.010802,-0.08736,0.085665,0.380636,0.44799,-0.196426,0.84181,-0.204024
8,-0.16392,0.268242,0.038688,-0.286963,0.171976,-0.335502,0.019587,0.006956,-0.169195,-0.642133,...,-0.034361,-0.271349,0.007027,0.085365,-0.035724,0.226567,0.44906,-0.27235,0.971888,-0.071143
9,-0.23006,0.252281,0.138815,-0.362859,0.207857,-0.330688,0.1332,0.062609,-0.201861,-0.227111,...,0.082448,-0.150449,0.096064,0.22014,-0.233823,0.36234,0.401944,-0.131861,1.046841,-0.171803


In [31]:
stocks_embeddings

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,758,759,760,761,762,763,764,765,766,767
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-14 11:21:00,-0.052625,-0.056637,0.275780,-0.061722,0.498975,-0.385128,0.204143,0.010470,-0.201437,0.021336,...,-0.045116,-0.263430,0.112653,0.243946,0.108075,0.558196,0.105344,-0.061896,0.735771,-0.223380
2022-02-14 22:51:00,-0.135933,-0.048294,0.189464,-0.238942,0.263704,-0.356758,0.089762,0.072749,-0.143251,-0.273397,...,-0.159454,-0.275557,0.111238,0.136217,0.104335,0.437711,0.124126,0.085198,0.809251,-0.163639
2022-02-15 00:11:00,0.021546,0.082647,0.165778,-0.229050,0.250231,-0.213869,0.213937,0.067989,-0.073079,-0.419813,...,-0.039652,-0.145634,0.080332,0.158677,0.137647,0.366496,0.505922,-0.147929,0.681135,-0.306196
2022-02-15 00:16:00,-0.245073,0.019305,0.111129,-0.293934,0.243879,-0.458061,0.102754,0.111198,-0.105002,-0.298304,...,-0.131691,-0.292038,0.029568,0.079893,0.076912,0.213032,0.212941,-0.009692,0.923538,-0.176232
2022-02-15 11:27:00,-0.198948,0.076804,0.025389,-0.248797,0.083931,-0.283902,-0.044274,0.212359,0.032249,-0.361621,...,0.050618,-0.145106,0.084311,-0.087623,0.026545,0.195317,0.257318,-0.182255,0.728356,-0.260783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23 23:46:00,0.045867,0.054387,0.017006,-0.306115,0.286809,-0.276083,-0.073570,0.076227,0.229950,-0.202089,...,-0.362903,-0.208436,0.091245,0.071224,0.117848,0.360291,0.039122,0.134395,0.542291,-0.152307
2022-12-24 14:52:00,-0.066175,0.173763,0.216142,-0.318642,0.250214,-0.252574,0.125638,0.143627,-0.147775,-0.193318,...,-0.030811,-0.153339,0.201843,0.200524,-0.012524,0.496237,0.276510,-0.054524,0.762946,-0.060602
2022-12-24 14:53:00,-0.235255,0.215270,0.246363,-0.233437,0.245628,-0.397608,0.233111,0.075187,-0.164326,-0.205617,...,-0.118355,-0.012383,0.155608,0.273035,0.074439,0.424546,0.356739,-0.034546,0.824623,-0.208901
2022-12-24 19:30:00,-0.158475,0.290374,0.018521,-0.214107,0.207087,-0.358771,0.009826,0.172023,-0.258067,-0.303361,...,0.118903,-0.158540,0.045618,0.101948,0.106072,0.179137,0.299408,-0.056195,1.023767,-0.243014


In [30]:
stocks_embeddings.ewm(alpha=0.9).mean()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,758,759,760,761,762,763,764,765,766,767
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-14 11:21:00,-0.052625,-0.056637,0.275780,-0.061722,0.498975,-0.385128,0.204143,0.010470,-0.201437,0.021336,...,-0.045116,-0.263430,0.112653,0.243946,0.108075,0.558196,0.105344,-0.061896,0.735771,-0.223380
2022-02-14 22:51:00,-0.128359,-0.049053,0.197311,-0.222831,0.285092,-0.359337,0.100161,0.067087,-0.148541,-0.246603,...,-0.149059,-0.274455,0.111367,0.146011,0.104675,0.448664,0.122419,0.071826,0.802571,-0.169070
2022-02-15 00:11:00,0.006690,0.069596,0.168903,-0.228434,0.253686,-0.228285,0.202662,0.067899,-0.080557,-0.402648,...,-0.050494,-0.158400,0.083408,0.157422,0.134380,0.374639,0.467917,-0.126152,0.693169,-0.292607
2022-02-15 00:16:00,-0.219919,0.024330,0.116901,-0.287390,0.244858,-0.435104,0.112736,0.106872,-0.102560,-0.308729,...,-0.123579,-0.278686,0.034947,0.087639,0.082654,0.229179,0.238416,-0.021327,0.900522,-0.187859
2022-02-15 11:27:00,-0.201045,0.071557,0.034540,-0.252656,0.100022,-0.299021,-0.028575,0.201811,0.018770,-0.356333,...,0.033200,-0.158462,0.079375,-0.070098,0.032155,0.198703,0.255428,-0.166163,0.745571,-0.253491
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23 23:46:00,0.032601,0.078433,0.032492,-0.318373,0.276017,-0.263393,-0.054956,0.087959,0.189880,-0.206820,...,-0.326314,-0.215463,0.098930,0.082369,0.107313,0.376260,0.043778,0.130093,0.583377,-0.143427
2022-12-24 14:52:00,-0.056297,0.164230,0.197777,-0.318615,0.252794,-0.253656,0.107579,0.138060,-0.114010,-0.194669,...,-0.060361,-0.159551,0.191552,0.188708,-0.000540,0.484239,0.253237,-0.036062,0.744989,-0.068885
2022-12-24 14:53:00,-0.217360,0.210166,0.241505,-0.241955,0.246345,-0.383213,0.220558,0.081474,-0.159294,-0.204522,...,-0.112555,-0.027100,0.159203,0.264603,0.066941,0.430515,0.346389,-0.034697,0.816659,-0.194899
2022-12-24 19:30:00,-0.164364,0.282353,0.040819,-0.216892,0.211012,-0.361216,0.030899,0.162968,-0.248190,-0.293477,...,0.095757,-0.145396,0.056976,0.118214,0.102159,0.204275,0.304106,-0.054045,1.003057,-0.238203
