In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/Final_Block/

Mounted at /content/drive
/content/drive/MyDrive/Final_Block


In [None]:
!ls

fused_embeddings_hierarchical.npy  test_btc_ohlcv.csv
fused_embeddings_simple.npy	   test_embed_ohlcv.csv
Fusion.ipynb			   test_news_embeddings.pkl
news_btc_embeddings.pkl		   timesnet_backbone_embeddings.npy
summ_news.csv			   timesnet_labels.npy


In [None]:
import numpy as np
import pandas as pd
import pickle
import torch
import torch.nn as nn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [None]:
ohlcv_embed = np.load('timesnet_backbone_embeddings.npy')
news_embed = pickle.load(open('test_news_embeddings.pkl', 'rb'))

In [None]:
news_embed = news_embed['embeddings']

In [None]:
print(ohlcv_embed.shape)
print(news_embed.shape)

(3195, 49, 768)
(3195, 768)


In [None]:
# ohlcv_embed = ohlcv_embed[:, -1:, :]

In [None]:
ohlcv_embed.shape

(3195, 1, 768)

In [None]:
df = pd.read_csv('test_embed_ohlcv.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15978 entries, 0 to 15977
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        15978 non-null  object 
 1   open        15978 non-null  float64
 2   high        15978 non-null  float64
 3   low         15978 non-null  float64
 4   close       15978 non-null  float64
 5   volume      15978 non-null  float64
 6   pre_return  15978 non-null  float64
 7   return      15978 non-null  float64
dtypes: float64(7), object(1)
memory usage: 998.8+ KB


In [None]:
df = df.iloc[:3195]

In [None]:
df['future_high'] = df['high'].rolling(window=15).max().shift(-15)
df['future_low'] = df['low'].rolling(window=15).min().shift(-15)

df['range'] = ((df['future_high'] - df['future_low']) / df['close'] * 100).fillna(0)

In [None]:
df['range'].mean()

np.float64(1.4479785423321856)

In [None]:
def categorize_volatility(range_pct):
    if pd.isna(range_pct):
        return np.nan

    if range_pct >= 1:
        return 'high_volatility'
    else:
        return 'normal_volatility'


df['label'] = df['range'].apply(lambda x: categorize_volatility(x))

In [None]:
from sklearn import preprocessing
import numpy as np

le = preprocessing.LabelEncoder()

labels = le.fit_transform(labels)
print(f"Encoded data: {labels}")

classes = le.classes_
print(f"Classes: {classes}")

Encoded data: [0 0 1 ... 1 1 1]
Classes: ['high_volatility' 'normal_volatility']


In [None]:
labels=df['label'].tolist()

In [None]:
cnt = 0
for i in labels:
  if i == 0:
    cnt +=1
print(cnt)

1880


In [None]:
def temporal_pool_concat(ohlcv, news, pool_method='mean'):
    """Best for capturing both temporal and semantic info"""

    if pool_method == 'mean':
        ohlcv_pooled = ohlcv.mean(axis=1)
    elif pool_method == 'max':
        ohlcv_pooled = ohlcv.max(axis=1)
    elif pool_method == 'last':
        ohlcv_pooled = ohlcv[:, -1, :]

    fused = np.concatenate([ohlcv_pooled, news], axis=1)
    return fused

fused_simple = temporal_pool_concat(ohlcv_embed, news_embed, pool_method='mean')
print(f"Simple fusion shape: {fused_simple.shape}")


np.save('fused_embeddings_simple.npy', fused_simple)


=== Fusion Results ===
Simple fusion shape: (3195, 1536)
Smart fusion shape: (3195, 512)
Hierarchical fusion shape: (3195, 3072)
Attention fusion shape: torch.Size([3195, 768])


In [None]:
returns = np.load('timesnet_labels.npy')

In [None]:
returns[3190]

array([[ 6.353914  ,  6.3462152 ,  6.363626  ,  6.3554354 , -0.5174512 ,
         0.03308937, -0.18157104]], dtype=float32)

In [None]:
label_emb_flat = returns.squeeze(1)

In [None]:
label_emb_flat[3190]

array([ 6.353914  ,  6.3462152 ,  6.363626  ,  6.3554354 , -0.5174512 ,
        0.03308937, -0.18157104], dtype=float32)

In [None]:
class_labels = (label_emb_flat.mean(axis=1) > 0).astype(int)

In [None]:
class_labels.shape

(3195,)

In [None]:
fused_features.shape

(3195, 1536)

In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
labels.shape

(3195,)

In [None]:
labels = labels
X_train, X_val, y_train, y_val = train_test_split(
    fused_features, labels, test_size=0.2, random_state=42
)

train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train))
val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.LongTensor(y_val))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.layers(x)

model = MLP(input_dim=fused_features.shape[1], num_classes=len(np.unique(labels)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
best_val_acc = 0
patience = 10
patience_counter = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    val_loss = 0
    val_preds = []
    val_true = []

    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_preds.extend(predicted.cpu().numpy())
            val_true.extend(batch_y.cpu().numpy())

    val_acc = accuracy_score(val_true, val_preds)

    print(f'Epoch {epoch+1}/{num_epochs}:')
    print(f'Train Loss: {train_loss/len(train_loader):.4f}')
    print(f'Val Loss: {val_loss/len(val_loader):.4f}')
    print(f'Val Accuracy: {val_acc:.4f}')

    # Early stopping
    # if val_acc > best_val_acc:
    #     best_val_acc = val_acc
    #     torch.save(model.state_dict(), 'best_model.pth')
    #     patience_counter = 0
    # else:
    #     patience_counter += 1
    #     if patience_counter >= patience:
    #         print(f'Early stopping at epoch {epoch+1}')
    #         break

model.load_state_dict(torch.load('best_model.pth'))
model.eval()

with torch.no_grad():
    test_outputs = model(torch.FloatTensor(X_val))
    _, test_preds = torch.max(test_outputs, 1)

print("\nFinal Results:")
print(classification_report(y_val, test_preds.numpy()))

Epoch 1/100:
Train Loss: 0.6917
Val Loss: 0.6777
Val Accuracy: 0.6119
Epoch 2/100:
Train Loss: 0.5427
Val Loss: 0.8137
Val Accuracy: 0.5571
Epoch 3/100:
Train Loss: 0.3389
Val Loss: 0.9974
Val Accuracy: 0.5571
Epoch 4/100:
Train Loss: 0.1713
Val Loss: 1.5235
Val Accuracy: 0.5430
Epoch 5/100:
Train Loss: 0.1010
Val Loss: 1.8875
Val Accuracy: 0.5415
Epoch 6/100:
Train Loss: 0.0761
Val Loss: 2.1043
Val Accuracy: 0.5430
Epoch 7/100:
Train Loss: 0.0598
Val Loss: 2.2729
Val Accuracy: 0.5587
Epoch 8/100:
Train Loss: 0.0530
Val Loss: 2.3347
Val Accuracy: 0.5587
Epoch 9/100:
Train Loss: 0.0349
Val Loss: 2.5647
Val Accuracy: 0.5571
Epoch 10/100:
Train Loss: 0.0351
Val Loss: 2.7546
Val Accuracy: 0.5524
Epoch 11/100:
Train Loss: 0.0442
Val Loss: 2.8071
Val Accuracy: 0.5493
Epoch 12/100:
Train Loss: 0.0341
Val Loss: 2.9368
Val Accuracy: 0.5243
Epoch 13/100:
Train Loss: 0.0384
Val Loss: 2.7866
Val Accuracy: 0.5603
Epoch 14/100:
Train Loss: 0.0421
Val Loss: 2.7099
Val Accuracy: 0.5368
Epoch 15/100:
T