In [7]:
!pip install catboost xgboost
!pip install pyqlib[all]

import qlib
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord
from qlib.data import D  # Qlib Data Module
from qlib.contrib.model.gbdt import LGBModel
  # Using LightGBM as an additional model
from qlib.contrib.data.handler import Alpha158  # Feature Engineering
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

# Initialize Qlib
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")

# Step 1: Load and Process Stock Data from CSV
df_combined = pd.read_csv("all_stocks_historical_data.csv", index_col=0, parse_dates=True)
df_combined.dropna(inplace=True)
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_combined)
data_tensor = torch.FloatTensor(df_scaled)

# Function to create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return torch.stack(X), torch.stack(y)

seq_length = 30
X, y = create_sequences(data_tensor, seq_length)

# Train-Test Split
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Step 2: Define and Train the Model
class StockPredictorSAMBA(nn.Module):
    def _init_(self, input_dim, hidden_dim, output_dim):
        super(StockPredictorSAMBA, self)._init_()
        self.bi_mamba = nn.LSTM(input_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.agc = nn.Linear(2 * hidden_dim, hidden_dim)
        self.ffn = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        x, _ = self.bi_mamba(x)
        x = torch.relu(self.agc(x))
        x = self.ffn(x[:, -1, :])
        return x

# Model Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = StockPredictorSAMBA(input_dim=df_combined.shape[1], hidden_dim=64, output_dim=df_combined.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, epochs=50):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.6f}")

train_model(model, train_loader)

def evaluate_model(model, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            output = model(batch_X)
            loss = criterion(output, batch_y)
            total_loss += loss.item()
    print(f"Test Loss: {total_loss / len(test_loader):.6f}")

evaluate_model(model, test_loader)

# Step 3: Prediction and Qlib Experiment Logging
def predict_future(model, last_data, days=30):
    model.eval()
    predictions = []
    input_seq = torch.FloatTensor(last_data).to(device).unsqueeze(0)
    with torch.no_grad():
        for _ in range(days):
            pred = model(input_seq)
            predictions.append(pred.cpu().numpy())
            pred = pred.unsqueeze(1)
            input_seq = torch.cat((input_seq[:, 1:, :], pred), dim=1)
    return scaler.inverse_transform(np.array(predictions).squeeze())

# Record Experiment in Qlib
with R.start(exp_name="Samba_Qlib_Experiment"):
    sr = SignalRecord(model, df_combined, df_combined.columns.tolist())
    sr.generate()

future_predictions = predict_future(model, data_tensor[-seq_length:], days=30)
print(future_predictions)

ERROR: Exception:

Collecting catboost







Traceback (most recent call last):

  Using cached catboost-1.2.7-cp312-cp312-win_amd64.whl.metadata (1.2 kB)





Collecting xgboost
  Using cached xgboost-3.0.0-py3-none-win_amd64.whl.metadata (2.1 kB)

  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\urllib3\response.py", line 438, in _error_catcher







Collecting graphviz (from catboost)

    yield





  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\urllib3\response.py", line 561, in read

  Using cached graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)







    data = self._fp_read(amt) if not fp_closed else b""





           ^^^^^^^^^^^^^^^^^^








  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\urllib3\response.py", line 527, in _fp_read





    return self._fp.read(amt) if amt is not None else self._fp.read()








           ^^^^^^^^^^^^^^^^^^




  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\cachecontrol\filewrapper.py", line 98, in read









    data: bytes = self.__fp.read(amt)









                  ^^^^^^^^^^^^^^^^^^^




  File "C:\Users\KIIT\anaconda3\Lib\http\client.py", line 479, in read









    s = self.fp.read(amt)




        ^^^^^^^^^^^^^^^^^





  File "C:\Users\KIIT\anaconda3\Lib\socket.py", line 708, in readinto








    return self._sock.recv_into(b)




           ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\KIIT\anaconda3\Lib\ssl.py", line 1252, in recv_into





    return self.read(nbytes, buffer)








           ^^^^^^^^^^^^^^^^^^^^^^^^^




  File "C:\Users\KIIT\anaconda3\Lib\ssl.py", line 1104, in read


Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl (101.7 MB)


    return self._sslobj.read(len, buffer)


   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--


           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^







   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

TimeoutError: The read operation timed out








   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

During handling of the above exception, another exception occurred:


   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--








Traceback (most recent call last):

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--







  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\base_command.py", line 106, in _run_wrapper

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--







    status = _inner_run()

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--



             ^^^^^^^^^^^^

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--







  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\base_command.py", line 97, in _inner_run

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--





   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

    return self.run(options, args)







   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

           ^^^^^^^^^^^^^^^^^^^^^^^





  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\req_command.py", line 67, in wrapper

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--







    return func(self, options, args)

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--







           ^^^^^^^^^^^^^^^^^^^^^^^^^

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--







  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\commands\install.py", line 386, in run


   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

    requirement_set = resolver.resolve(







   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--


                      ^^^^^^^^^^^^^^^^^

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--


  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\resolution\resolvelib\resolver.py", line 179, in resolve







   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

    self.factory.preparer.prepare_linked_requirements_more(reqs)


   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--


  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\operations\prepare.py", line 554, in prepare_linked_requirements_more





    self._complete_partial_requirements(

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--





   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\operations\prepare.py", line 469, in _complete_partial_requirements







   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

    for link, (filepath, _) in batch_download:





                               ^^^^^^^^^^^^^^

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--



  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\network\download.py", line 184, in __call__

   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--





   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--

    for chunk in chunks:



   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

                 ^^^^^^





  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\progress_bars.py", line 55, in _rich_progress_bar

   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--





   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

    for chunk in iterable:







   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

                 ^^^^^^^^







   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--


  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_internal\network\utils.py", line 65, in response_chunks


   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--


    for chunk in response.raw.stream(


   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--


                 ^^^^^^^^^^^^^^^^^^^^

   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--


  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\urllib3\response.py", line 622, in stream


   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--







    data = self.read(amt=amt, decode_content=decode_content)

   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--



           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--





   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\urllib3\response.py", line 560, in read







   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

    with self._error_catcher():



   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

  File "C:\Users\KIIT\anaconda3\Lib\contextlib.py", line 158, in __exit__







   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

    self.gen.throw(value)







   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

  File "C:\Users\KIIT\AppData\Roaming\Python\Python312\site-packages\pip\_vendor\urllib3\response.py", line 443, in _error_catcher





    raise ReadTimeoutError(self._pool, None, "Read timed out.")

   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--





   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--

pip._vendor.urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='files.pythonhosted.org', port=443): Read timed out.







   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   ---

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.7 MB 17.7 kB/s eta 1:28:00
   --- ------------------------------------ 8.4/101.











[14312:MainThread](2025-03-21 00:42:04,996) INFO - qlib.Initialization - [config.py:420] - default_conf: client.
[14312:MainThread](2025-03-21 00:42:05,013) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[14312:MainThread](2025-03-21 00:42:05,019) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': WindowsPath('C:/Users/KIIT/.qlib/qlib_data/cn_data')}


FileNotFoundError: [Errno 2] No such file or directory: 'all_stocks_historical_data.csv'