In [None]:
import random
import numpy as np
import torch

def set_seed(seed):
    """
    Set seeds for reproducibility.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(42)    

In [None]:
import matplotlib.pyplot as plt
from kl.homoscedastic_transformer import HomoscedasticTransformer as HomoTrans
from kl.utils import load_fx


window_size = 10
pair = 'EURUSD'
X, y, returns = load_fx(data_start=0, data_end=5000, window_size=window_size, shift=1, pair=pair)
X_new, y_new, returns_new = load_fx(data_start=0, data_end=6000, window_size=window_size, shift=1, pair=pair)

input_size = X.shape[1]
hidden_size = 500
latent_size = 10  # Latent space dimension

ht = HomoTrans(input_size=input_size, hidden_size=hidden_size, latent_size=latent_size, verbose=True)

X_Tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)
X_Tensor_new = torch.tensor(X_new, dtype=torch.float32)
y_tensor_new = torch.tensor(y_new, dtype=torch.long)

# Fit the model and transform data
ht.fit(X_Tensor, epochs=3000)# at 3000 epoch reach steady loss ~ 1.0

In [None]:
X_Homo = ht.transform(X_Tensor)
X_Homo1 = ht.transform(X_Tensor)
X_Homo_new = ht.transform(X_Tensor_new)
plt.plot(X_Homo[0:50,0])
plt.plot(X_Homo_new[0:50,0])
plt.title('Homoscedastic transformation')
plt.show()

In [None]:
print(f'Std : {np.std(X_Homo)}, Std new : {np.std(X_Homo_new)}')

In [None]:
plt.plot(X_Homo)
plt.plot(X_Homo_new)
plt.title('Homoscedastic transformation')
plt.show()

In [None]:
np.savetxt('X_Homo.csv', X_Homo, delimiter=',')
np.savetxt('X_Homo_new.csv', X_Homo_new, delimiter=',')
# np.savetxt('X.csv', X, delimiter=',')
# np.savetxt('X_new.csv', X_new, delimiter=',')


In [None]:
np.savetxt("reconstructed_data_homo.csv", X_Homo, delimiter=",")

In [None]:
from statsmodels.stats.diagnostic import het_arch
from termcolor import colored
# Perform Engle's ARCH test for heteroscedasticity on the residuals
arch_test_resid = het_arch(X_Homo_new[:,0])
#arch_test_resid = het_arch(X[:,0])

# Extract the test statistic and p-value
arch_stat_resid = arch_test_resid[0]
arch_p_value_resid = arch_test_resid[1]

print(f'ARCH Test Statistic: {arch_stat_resid}')
print(f'p-value: {arch_p_value_resid:.4f}')
if arch_p_value_resid > 0.05:
    print(colored("A p-value > 0.05 means that heteroscedasticity is no longer present in the residuals, indicating that VAE has successfully reduced it.",'red'))
else:
    print(f"VAE fail to reduce heteroscedasticity")

In [None]:
from arch import arch_model

# Rescale the data (multiply by 100)
scaled_data = X_Homo_new[:,0] * 10

# Fit the GARCH model with rescaled data
garch_model = arch_model(scaled_data, vol='GARCH', p=1, q=1)
garch_rescaled_fitted = garch_model.fit(disp="off")

# Summarize the model
print(garch_rescaled_fitted.summary())


# Forecasting future volatility with rescaled data
forecast = garch_rescaled_fitted.forecast(horizon=5)
forecast_variance_rescaled = forecast.variance[-1:] / 10  # Scale back
print(forecast_variance_rescaled)

In [None]:

# Plot the conditional volatility over time (variance over time)
plt.figure(figsize=(10, 6))
# Plot the conditional volatility (square root of variance)
conditional_volatility = garch_rescaled_fitted.conditional_volatility
plt.plot(garch_rescaled_fitted.conditional_volatility, label='Conditional Volatility')
plt.title('Conditional Volatility Over Time (GARCH Model)')
plt.xlabel('Time')
plt.ylabel('Conditional Volatility')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
import json
def read_config(config_path):
    with open(config_path, 'r') as f:
        config_ = json.load(f)
    return config_
config = read_config("dichotomy_config.json")
latent_dim = config.get("latent_dim", 2)

In [None]:
from kl.dichotomy_vae import DichotomyVAE, LossType
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from datetime import datetime

# Example usage for fitting and transforming data
input_dim = X.shape[1]
vae_model = DichotomyVAE(input_dim=input_dim, output_dim=input_dim, latent_dim=latent_dim, num_classes=2, loss_type = LossType.Classifier, verbose=True)
optimizer = optim.Adam(vae_model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=5000)


X_Homo_Tensor = torch.tensor(X, dtype=torch.float)
X_Homo_Tensor_new = torch.tensor(X_new, dtype=torch.float)

# Create a DataLoader for batching
batch_size = 64
dataset = TensorDataset(X_Homo_Tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
start_time = datetime.now()
# Fit and transform
vae_model.fit(dataloader, optimizer, scheduler, num_epochs=5000, beta=1, lambda_class=1)

# Strange order of transform
x_reconstructed, latent_representation, predicted_class = vae_model.transform(X_Homo_Tensor) 
x_reconstructed1, latent_representation1, predicted_class1 = vae_model.transform(X_Homo_Tensor) 
x_reconstructed_new, latent_representation_new, predicted_class_new = vae_model.transform(X_Homo_Tensor_new) 

end_time = datetime.now()
# Execution time
execution_time = end_time - start_time
print(f"Execution time: {execution_time}")

In [None]:
x_reconstructed_np = x_reconstructed.cpu().detach().numpy()
x_reconstructed_np_new = x_reconstructed_new.detach().cpu().numpy()
np.savetxt("x_reconstructed_np.csv", x_reconstructed_np, delimiter=",")
np.savetxt("x_reconstructed_np_new.csv", x_reconstructed_np_new, delimiter=",")

latent_representation_np = latent_representation.detach().cpu().numpy()
latent_representation_np_new = latent_representation_new.detach().cpu().numpy()
np.savetxt("latent_representation_np.csv", latent_representation_np, delimiter=",")
np.savetxt("latent_representation_np_new.csv", latent_representation_np_new, delimiter=",")

predicted_class_np = predicted_class.cpu().detach().numpy()
np.savetxt("predicted_class_np.csv", predicted_class_np, delimiter=",")

np.savetxt("real_class_np.csv", y, delimiter=",")
np.savetxt("real_class_np_new.csv", y_new, delimiter=",")

In [None]:
predicted_class_np_new = predicted_class_new.cpu().detach().numpy()

In [None]:
print(f'Class Mean: {np.mean(predicted_class_np)}')

In [None]:
from sklearn.metrics import accuracy_score
acc_trn = accuracy_score(y, predicted_class_np)
print(f"Train Accuracy: {acc_trn}")

acc_tst = accuracy_score(y_new[5000:5050,], predicted_class_np_new[5000:5050,])
print(f"Train Accuracy: {acc_tst}")

In [None]:
# visualize the embedding
from sklearn. preprocessing import StandardScaler
scaler = StandardScaler()
latent_representation_np = scaler.fit_transform(latent_representation_np)
if np.shape(latent_representation)[1] == 3:
    fig, _ = plt.subplots(1, 1, figsize=(6, 6))
    ax = plt.axes(projection ="3d")
    ax.scatter(latent_representation_np[:, 0], latent_representation_np[:, 1], latent_representation_np[:, 2], cmap="autumn", c=y, s=20)
elif np.shape(latent_representation)[1] == 2:
    fig, ax = plt.subplots(1, 1, figsize=(6, 6))
    ax.scatter(latent_representation_np[:, 0], latent_representation_np[:, 1], cmap="autumn", c=y, s=20)   
else:
    print("Can Not Plot Latent Representation. Dimensions Mismatch")

In [None]:
_, ax = plt.subplots(1, 1, figsize=(6, 6))
color_list = ['r', 'g', 'b', 'c', 'y', 'k', 'm', 'w']
for i in range(np.shape(latent_representation)[1]):
    ax.hist(latent_representation_np[:, i], bins=50, color=color_list[i])

In [None]:
_, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.set_title('Reconstructed')
for i in range(np.shape(x_reconstructed_np)[1]):
    ax.hist(x_reconstructed_np[:, i], bins=50, color=color_list[i])

In [None]:
import pandas as pd
import seaborn as sns
df = pd.DataFrame(x_reconstructed_np)
# df = pd.DataFrame(latent_representation_np)
corr = df.corr()
sns.heatmap(corr, annot = True)
plt.show()

In [None]:
from sklearn.feature_selection import mutual_info_classif
mutual_info_classif(latent_representation_np, y)
# mutual_info_classif(x_reconstructed_np, y)


In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=np.shape(latent_representation_np)[1])
pca.fit(latent_representation_np)
# pca = PCA(n_components=8, )
# pca.fit(x_reconstructed_np)

print(pca.explained_variance_ratio_)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, random_state=0)
x_train = latent_representation_np[0:4500,:]
y_train = y[0:4500]
x_test = latent_representation_np[4500:,:]
y_test = y[4500:]
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy Random Forest: {acc}")

In [None]:
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(n_estimators=100, algorithm="SAMME", random_state=0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy AdaBoost: {acc}")

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier

clf = BaggingClassifier(estimator=SVC(), n_estimators=100, random_state=0)
clf.fit(x_train, y_train) 
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy BaggingClassifier: {acc}")

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=100, random_state=1)
clf3 = GaussianNB()
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1.fit(x_train, y_train)
y_pred = eclf1.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy Voting: {acc}")

In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier

clf = HistGradientBoostingClassifier()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy HistGradientBoostingClassifier: {acc}")

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy GradientBoostingClassifier: {acc}")

In [None]:
x_train = x_reconstructed_np[0:4500,:]
x_test = x_reconstructed_np[4500:,:]
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Reconstructed Test Accuracy Random Forest: {acc}")

Random Forest 
Latent 4 - 72%
Latent 3 - 86%
Latent 2 - 92%
Reconstructed 2 - 92%

In [None]:
clf = HistGradientBoostingClassifier()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Reconstructed Test Accuracy HistGradientBoostingClassifier: {acc}")

In [None]:
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Reconstructed Test Accuracy GradientBoostingClassifier: {acc}")

In [None]:
eclf1.fit(x_train, y_train)
y_pred = eclf1.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Reconstructed Test Accuracy Voting: {acc}")

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

clf = ExtraTreesClassifier(n_estimators=100, random_state=0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print(f"Reconstructed Test Accuracy ExtraTreesClassifier: {acc}")

In [None]:
# x_reconstructed_new, latent_representation_new, predicted_class_new = vae_model.transform(X_Homo_Tensor_new) 
clf = ExtraTreesClassifier(n_estimators=100, random_state=0)
clf.fit(latent_representation_np, y)
y_pred = clf.predict(latent_representation_np_new[5000:5050,:])
acc = accuracy_score(y_new[5000:5050,], y_pred)
print(f"Reconstructed Test Accuracy ExtraTreesClassifier: {acc}")

In [None]:
x_reconstructed_np1 = x_reconstructed1.cpu().detach().numpy()
clf = ExtraTreesClassifier(n_estimators=100, random_state=0)
clf.fit(x_reconstructed_np_new[0:4990,:], y)
y_pred = clf.predict(x_reconstructed_np_new)
acc = accuracy_score(y_new[5000:], y_pred[5000:])
print(f"Reconstructed Test Accuracy ExtraTreesClassifier: {acc}")

In [None]:
plt.plot(y_pred[4990:5020,])
plt.plot(y_new[4990:5020,])