In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from hyperopt import hp
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.schedulers import ASHAScheduler


In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("gpu")
else:
    device = torch.device('cpu')
print(torch.__version__)
print('CUDA available:', torch.cuda.is_available())
print('CUDA version:', torch.version.cuda)
print('cuDNN version:', torch.backends.cudnn.version())

gpu
2.1.2+cu121
CUDA available: True
CUDA version: 12.1
cuDNN version: 8902


In [3]:
data = pd.read_csv("Technology_Firm_Stock_Price.csv")
data = data.sort_values(by="Date")
data.iloc[-197:]

Unnamed: 0,Date,ADSK,ORCL,PTC,CDNS,FICO,CRM,SNPS,INTU,TYL,...,NOW,MSFT,PANW,CDW,NTAP,STX,HPQ,WDC,AAPL,HPE
809,2023-03-21,207.149994,86.258675,120.389999,207.880005,696.880005,188.679993,374.359985,419.189697,329.570007,...,445.459991,272.053467,194.490005,192.704956,62.480740,60.264462,27.854996,35.470001,158.636475,14.427283
810,2023-03-22,198.380005,86.573853,118.570000,204.000000,678.700012,186.509995,371.350006,411.422363,320.709991,...,433.510010,270.572845,190.889999,188.929932,59.911034,58.731163,27.115051,34.700001,157.192352,14.114285
811,2023-03-23,201.139999,86.455650,119.980003,207.000000,680.849976,187.440002,379.399994,429.579315,322.549988,...,440.470001,275.908997,193.889999,189.346069,59.891563,59.779240,26.988483,35.270000,158.287888,13.957786
812,2023-03-24,200.220001,86.682182,121.699997,204.479996,690.940002,190.059998,376.559998,426.678986,329.480011,...,432.899994,278.800659,191.550003,189.950470,59.434082,59.818062,26.988483,35.259998,159.602585,13.918660
813,2023-03-27,198.750000,88.780052,123.169998,205.270004,688.630005,191.259995,373.920013,424.066650,332.250000,...,431.309998,274.637054,192.529999,189.910843,60.018108,60.177120,27.153997,35.209999,157.640533,14.681595
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,2023-12-22,242.759995,105.790077,173.970001,275.820007,1168.880005,266.339996,524.460022,623.131348,415.000000,...,697.549988,374.579987,298.209991,226.630005,88.882095,85.160004,30.180000,52.660000,193.600006,17.090000
1002,2023-12-26,245.070007,105.780121,175.309998,274.959991,1164.619995,266.220001,520.250000,623.910156,416.410004,...,701.229980,374.660004,300.820007,228.729996,88.862213,85.870003,30.410000,52.759998,193.050003,17.230000
1003,2023-12-27,245.110001,105.531090,175.720001,274.640015,1170.609985,266.720001,518.099976,628.173767,415.600006,...,703.760010,374.070007,297.500000,228.550003,88.086731,85.680000,30.240000,52.419998,193.149994,17.090000
1004,2023-12-28,244.910004,105.839890,175.919998,273.239990,1169.339966,265.579987,517.409973,627.075439,418.290009,...,702.460022,375.279999,295.579987,228.699997,87.698990,86.790001,30.180000,52.930000,193.580002,17.170000


In [4]:
data = data.drop(columns=["Date"])
data

Unnamed: 0,ADSK,ORCL,PTC,CDNS,FICO,CRM,SNPS,INTU,TYL,ADBE,...,NOW,MSFT,PANW,CDW,NTAP,STX,HPQ,WDC,AAPL,HPE
0,187.830002,50.359100,76.370003,71.440002,382.920013,166.990005,142.869995,258.836151,306.239990,334.429993,...,291.239990,154.779541,78.470001,137.363739,55.579060,51.156185,18.257439,64.771545,73.152641,14.081444
1,184.949997,50.181740,75.430000,70.419998,381.920013,166.169998,141.759995,257.105133,306.670013,331.809998,...,291.100006,152.852264,78.943336,135.712097,54.631645,49.811756,18.029110,63.774597,72.441467,13.733110
2,187.119995,50.443100,76.269997,70.849998,384.000000,173.449997,141.979996,259.079346,310.209991,333.709991,...,292.869995,153.247345,80.086670,134.690536,54.498821,49.219864,18.099367,62.550629,73.018684,13.689568
3,187.500000,50.555126,75.919998,71.070000,388.489990,176.000000,142.960007,259.137634,311.269989,333.390015,...,292.910004,151.850082,80.546669,134.537781,54.853001,49.811756,18.143276,66.785164,72.675278,13.619902
4,189.949997,50.752010,76.980003,72.279999,391.329987,177.330002,145.860001,265.371277,310.989990,337.869995,...,295.649994,154.268814,79.523331,134.337265,55.906673,50.200710,18.380386,67.752487,73.844353,13.724401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,242.759995,105.790077,173.970001,275.820007,1168.880005,266.339996,524.460022,623.131348,415.000000,598.750000,...,697.549988,374.579987,298.209991,226.630005,88.882095,85.160004,30.180000,52.660000,193.600006,17.090000
1002,245.070007,105.780121,175.309998,274.959991,1164.619995,266.220001,520.250000,623.910156,416.410004,598.260010,...,701.229980,374.660004,300.820007,228.729996,88.862213,85.870003,30.410000,52.759998,193.050003,17.230000
1003,245.110001,105.531090,175.720001,274.640015,1170.609985,266.720001,518.099976,628.173767,415.600006,596.080017,...,703.760010,374.070007,297.500000,228.550003,88.086731,85.680000,30.240000,52.419998,193.149994,17.090000
1004,244.910004,105.839890,175.919998,273.239990,1169.339966,265.579987,517.409973,627.075439,418.290009,595.520020,...,702.460022,375.279999,295.579987,228.699997,87.698990,86.790001,30.180000,52.930000,193.580002,17.170000


In [5]:
scaler = MinMaxScaler(feature_range=(0,1))
data_reshape = data["ADSK"].values.reshape(-1,1)
scaled_data = pd.DataFrame(scaler.fit_transform(data_reshape))

scaled_data

Unnamed: 0,0
0,0.251273
1,0.237310
2,0.247831
3,0.249673
4,0.261550
...,...
1001,0.517574
1002,0.528773
1003,0.528967
1004,0.527997


In [6]:
company_dict = {}

for ticker in data.columns:
    scaler = MinMaxScaler(feature_range=(0,1))
    scaled_data = pd.DataFrame(scaler.fit_transform(data[ticker].values.reshape(-1,1)))
    company_dict[ticker] = {'scaler': scaler, 'scaled_data': scaled_data}
    
    print(f"{ticker} is added to the dictionary!")

ADSK is added to the dictionary!
ORCL is added to the dictionary!
PTC is added to the dictionary!
CDNS is added to the dictionary!
FICO is added to the dictionary!
CRM is added to the dictionary!
SNPS is added to the dictionary!
INTU is added to the dictionary!
TYL is added to the dictionary!
ADBE is added to the dictionary!
ANSS is added to the dictionary!
ANET is added to the dictionary!
CSCO is added to the dictionary!
MSI is added to the dictionary!
FFIV is added to the dictionary!
JNPR is added to the dictionary!
APH is added to the dictionary!
GLW is added to the dictionary!
TDY is added to the dictionary!
ROP is added to the dictionary!
TRMB is added to the dictionary!
KEYS is added to the dictionary!
ZBRA is added to the dictionary!
TEL is added to the dictionary!
JBL is added to the dictionary!
IT is added to the dictionary!
CTSH is added to the dictionary!
EPAM is added to the dictionary!
ACN is added to the dictionary!
IBM is added to the dictionary!
VRSN is added to the dic

In [7]:
company_dict["ADSK"]

{'scaler': MinMaxScaler(),
 'scaled_data':              0
 0     0.251273
 1     0.237310
 2     0.247831
 3     0.249673
 4     0.261550
 ...        ...
 1001  0.517574
 1002  0.528773
 1003  0.528967
 1004  0.527997
 1005  0.521065
 
 [1006 rows x 1 columns]}

In [8]:
def rolling_window(data):
    time_step = 22

    X, y = [], []

    for i in range(len(data) - time_step):
        X.append(data.iloc[i : (i+time_step)])
        y.append(data.iloc[i+time_step])
    
    X = np.array(X)
    y = np.array(y)

    print(X.shape)
    print(y.shape)

    return X, y

In [9]:
X, y = rolling_window(data["SNPS"])
y[-197:]

(984, 22)
(984,)


array([374.35998535, 371.3500061 , 379.3999939 , 376.55999756,
       373.92001343, 369.86999512, 375.04000854, 375.94000244,
       386.25      , 388.94000244, 384.92999268, 378.35998535,
       375.92001343, 379.98999023, 379.3999939 , 376.51998901,
       381.55999756, 381.67999268, 380.19000244, 380.42999268,
       378.76998901, 377.55999756, 377.26000977, 375.67001343,
       364.52999878, 365.1000061 , 368.39001465, 371.32000732,
       371.61999512, 368.82998657, 368.        , 366.61999512,
       371.55999756, 371.73999023, 366.95001221, 370.11999512,
       367.95999146, 368.83999634, 373.27999878, 375.14001465,
       377.07998657, 409.70999146, 408.5       , 401.67999268,
       393.08999634, 395.39001465, 434.20001221, 444.73001099,
       464.82998657, 454.95999146, 454.54998779, 449.35998535,
       450.16000366, 443.83999634, 435.32000732, 437.69000244,
       438.77999878, 446.04000854, 449.98001099, 446.04998779,
       446.36999512, 441.19000244, 435.5       , 426.33

In [10]:
X, y = rolling_window(company_dict["SNPS"]["scaled_data"])
y[1]

(984, 22, 1)
(984, 1)


array([0.0994974])

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train.shape
y_train.shape
print(f"x_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")

print(f"x_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

y_test_inverse = company_dict["SNPS"]["scaler"].inverse_transform(y_test)
y_test_inverse



x_train: (787, 22, 1)
y_train: (787, 1)
x_test: (197, 22, 1)
y_test: (197, 1)


array([[374.35998535],
       [371.3500061 ],
       [379.3999939 ],
       [376.55999756],
       [373.92001343],
       [369.86999512],
       [375.04000854],
       [375.94000244],
       [386.25      ],
       [388.94000244],
       [384.92999268],
       [378.35998535],
       [375.92001343],
       [379.98999023],
       [379.3999939 ],
       [376.51998901],
       [381.55999756],
       [381.67999268],
       [380.19000244],
       [380.42999268],
       [378.76998901],
       [377.55999756],
       [377.26000977],
       [375.67001343],
       [364.52999878],
       [365.1000061 ],
       [368.39001465],
       [371.32000732],
       [371.61999512],
       [368.82998657],
       [368.        ],
       [366.61999512],
       [371.55999756],
       [371.73999023],
       [366.95001221],
       [370.11999512],
       [367.95999146],
       [368.83999634],
       [373.27999878],
       [375.14001465],
       [377.07998657],
       [409.70999146],
       [408.5       ],
       [401

In [12]:
class RollingWindowDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float), torch.tensor(self.y[idx], dtype=torch.float)


In [13]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()

        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        self.lstm = nn.LSTM(input_size = input_dim, hidden_size = hidden_dim, num_layers = layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device) 

        # Initialize cell state
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device) 

        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :])

        return out

In [14]:
train_data = RollingWindowDataset(X_train, y_train)
test_data = RollingWindowDataset(X_test, y_test)

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = LSTMModel(input_dim=X.shape[2], hidden_dim=22, layer_dim=1, output_dim=y_train.shape[1]).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()

In [16]:
def train_lstm(config):

    batch_size = int(round(config["batch_size"]))
    epochs = int(round(config["epochs"]))
    hidden_dim = int(round(config["hidden_dim"]))
    layer_dim = int(round(config["layer_dim"]))
    learning_rate = config["learning_rate"]
    

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
    model = LSTMModel(input_dim=X.shape[2], hidden_dim=hidden_dim, layer_dim=layer_dim, output_dim=y_train.shape[1]).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        print('epochs {}/{}'.format(epoch+1,epochs))

        running_loss = .0

        model.train()

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad() # Clear the gradients
            output = model(data)  # Forward pass
            loss = criterion(output, target)
            loss.backward() # Compute gradient

            optimizer.step() # Update params
            running_loss += loss.item()


        train_loss = running_loss/len(train_loader)
        ray.train.report({"mse": train_loss})
        
        print(f'train_loss {train_loss}')
            # if batch_idx % 10 == 0:
            #     print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")

In [17]:
def Test(batch_size,test):
    all_predictions = []
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

    running_loss = .0
    
    model.eval()
    
    with torch.no_grad():

        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            preds = model(data)
            loss = criterion(preds,target)
            running_loss += loss.item()

            all_predictions.extend(preds.cpu().numpy())

            
        test_loss = running_loss/len(test_loader)

        print(f'test_loss {test_loss}')

    all_predictions = np.array(all_predictions)

    return all_predictions  # Return the NumPy array of predictions    

In [21]:
# Define hyperparameter configuration
config = {
    "learning_rate": tune.loguniform(1e-4, 1e-2),
    "batch_size": tune.uniform(32, 129),  # Convert to continuous
    "epochs": tune.uniform(50, 151),      # Convert to continuous
    "hidden_dim": tune.uniform(22, 76),   # Convert to continuous
    "layer_dim": tune.uniform(1, 4)
}

# Define scheduler and reporter
scheduler = ASHAScheduler(metric="mse", mode="min")
reporter = CLIReporter(metric_columns=["mse", "training_iteration"])
bayesopt = BayesOptSearch(metric="mse", mode="min")


# Start the tuning process
analysis = tune.run(
    train_lstm,
    resources_per_trial={"cpu": 1, "gpu": 1},  # adjust based on your GPU
    config=config,
    num_samples=30,
    scheduler=scheduler,
    search_alg=bayesopt,  # Use Bayesian optimization search
    progress_reporter=reporter
)


# Get the best hyperparameters
best_config = analysis.get_best_config(metric="mse", mode="min")
best_mse = analysis.get_best_trial(metric="mse", mode="min").last_result["mse"]

print("Best config: ", best_config)
print(f"Best MSE: {best_mse}")


2024-01-15 22:30:28,303	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-01-15 22:30:28 (running for 00:00:00.12)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: None | Iter 1.000: None
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/arda/ray_results/train_lstm_2024-01-15_22-30-28
Number of trials: 1/30 (1 PENDING)


[36m(train_lstm pid=44266)[0m epochs 1/146


Trial name,mse
train_lstm_0204f2e4,0.0383491
train_lstm_1e4479bc,0.0002396
train_lstm_2d498e46,0.0367006
train_lstm_373ac995,0.0347314
train_lstm_3e518c31,0.0238759
train_lstm_4ae6ab84,0.0134791
train_lstm_5d74cb28,0.0378146
train_lstm_7027d030,0.0276636
train_lstm_7147d8c2,0.058548
train_lstm_7557d3d0,0.0714388


[36m(train_lstm pid=44266)[0m train_loss 0.011323432127634684
[36m(train_lstm pid=44266)[0m epochs 2/146
[36m(train_lstm pid=44266)[0m train_loss 0.049105454119853675
[36m(train_lstm pid=44266)[0m epochs 3/146
[36m(train_lstm pid=44266)[0m train_loss 0.01875353842236412
[36m(train_lstm pid=44266)[0m epochs 4/146
[36m(train_lstm pid=44266)[0m train_loss 0.023409905707618844
[36m(train_lstm pid=44266)[0m epochs 5/146
[36m(train_lstm pid=44266)[0m train_loss 0.025049516077463824
[36m(train_lstm pid=44266)[0m epochs 6/146
[36m(train_lstm pid=44266)[0m train_loss 0.021086348531146843
[36m(train_lstm pid=44266)[0m epochs 7/146
[36m(train_lstm pid=44266)[0m train_loss 0.0202363688343515
[36m(train_lstm pid=44266)[0m epochs 8/146
[36m(train_lstm pid=44266)[0m train_loss 0.0166974315555611
[36m(train_lstm pid=44266)[0m epochs 9/146
[36m(train_lstm pid=44266)[0m train_loss 0.009520210088036643
[36m(train_lstm pid=44266)[0m epochs 10/146
[36m(train_lstm pid=44

2024-01-15 22:32:40,382	INFO tune.py:1042 -- Total run time: 132.08 seconds (132.04 seconds for the tuning loop).


[36m(train_lstm pid=45840)[0m train_loss 0.01136684336718948
Best config:  {'learning_rate': 0.006366818994423863, 'batch_size': 64.61777697625115, 'epochs': 141.6366927241757, 'hidden_dim': 68.603038592183, 'layer_dim': 1.1711154959957395}
Best MSE: 0.00023960028864586583


In [19]:
train_lstm(32, 30, train_data)

TypeError: train_lstm() takes 1 positional argument but 3 were given

In [None]:
preds = Test(32, test_data)

preds_inverse = company_dict["SNPS"]["scaler"].inverse_transform(preds)
print(preds_inverse)

mse = mean_squared_error(y_test_inverse, preds_inverse)
mape = mean_absolute_percentage_error(y_test_inverse, preds_inverse)*100

print(f"MSE: {mse}")
print(f"MAPE: {mape}")

In [None]:
preds_inverse = company_dict["SNPS"]["scaler"].inverse_transform(y_test)
preds_inverse


In [None]:
X, y = rolling_window(company_dict["CDNS"]["scaled_data"])
y[1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train.shape
y_train.shape

y_test_inverse = company_dict["CDNS"]["scaler"].inverse_transform(y_test)
y_test_inverse

In [None]:
train_data = RollingWindowDataset(X_train, y_train)
test_data = RollingWindowDataset(X_test, y_test)

In [None]:
preds = Test(32, test_data)
preds_inverse = company_dict["CDNS"]["scaler"].inverse_transform(preds)


mse = mean_squared_error(y_test_inverse, preds_inverse)
mape = mean_absolute_percentage_error(y_test_inverse, preds_inverse)*100

print(f"MSE: {mse}")
print(f"MAPE: {mape}")