# Data Generation (auxiliary)

In [1]:
import numpy as np
import pandas as pd
import torch

In [2]:
def sig(x):
    return 1 / (1 + np.exp(-x))

In [3]:
N = 1000
d = 10

mu = np.random.randn(1, d)
sigma = sig(np.random.randn(1, d))
X = np.random.randn(N, d) * sigma + mu

In [4]:
beta = np.random.randn(d, 1)
beta0 = np.random.randn(1)
Y = sig(X @ beta) + beta0

# Your Data

In [5]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv('./data/SOL/solana.csv')
data.head()

In [None]:
for col in data.columns:
  if data[col].isna().all():
    print(col)
    data = data.drop(col)

In [None]:
for col in data.columns:
  if col == 'Date':
    continue
  mean = np.mean(data[col][~data[col].isna()])
  data[col][data[col].isna()] = mean

data.head()

# Data Processing

In [None]:
index = np.random.permutation(N)
index_train = index[:int(0.8 * N)]
index_test = index[int(0.8 * N):]

X_train = X[index_train]
X_test = X[index_test]
Y_train = Y[index_train]
Y_test = Y[index_test]

In [None]:
x_mean = X_train.mean(axis=0)
x_std = X_train.std(axis=0)
X_train = (X_train - x_mean) / x_std
X_test = (X_test - x_mean) / x_std

y_mean = Y_train.mean()
y_std = Y_train.std()
Y_train = (Y_train - y_mean) / y_std
Y_test = (Y_test - y_mean) / y_std

# Models

**Neural Network**

In [None]:
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class Net(nn.Module):

  def __init__(self, input_size, hidden_sizes):

    super().__init__()

    self.layers = nn.Sequential()

    dims = (input_size,) + hidden_sizes
    for i in range(len(dims) - 1):
      self.layers.append(nn.Linear(dims[i], dims[i+1]))
      self.layers.append(nn.ReLU())

    self.layers.append(nn.Linear(dims[-1], 1))

  def forward(self, x):
    return self.layers(x)

In [None]:
N_epochs = 10
batch_size = 20
learning_rate = 0.01

model = Net(d, (20,)).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for e in range(N_epochs):
  p = np.random.permutation(len(X_train))
  X_train = X_train[p]
  Y_train = Y_train[p]

  for i in range(0, len(X_train), batch_size):
    X_batch = X_train[i:i+batch_size]
    Y_batch = Y_train[i:i+batch_size]

    x = torch.tensor(X_batch, dtype=torch.float32).to(device)
    y = torch.tensor(Y_batch, dtype=torch.float32).to(device)

    optimizer.zero_grad()
    y_pred = model(x)
    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()

  p_test = np.random.permutation(len(X_test))[:batch_size]
  X_test = X_test[p_test]
  Y_test = Y_test[p_test]

  x_test = torch.tensor(X_test, dtype=torch.float32).to(device)
  y_test = torch.tensor(Y_test, dtype=torch.float32).to(device)

  with torch.no_grad():
    y_pred = model(x_test)
    mse = criterion(y_pred, y_test)
    print(f"Epoch {e+1}/{N_epochs}, MSE: {mse.item():.4f}")

**Random Forest**

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
model = RandomForestRegressor(
    n_estimators=200,     # number of trees
    max_depth=10,       # or set an integer value
    random_state=42
)
model.fit(X_train, Y_train)

In [None]:
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print(f"Test MSE: {mse:.4f}")

Test MSE: 0.1897
