In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, Dataset

# Step 1: Prepare the data
# Load stock data
data = pd.read_csv("stockData.csv")
train_size = 1000
# Preprocess data
scaler = MinMaxScaler(feature_range=(-1, 1))
data_scaled = scaler.fit_transform(data.drop("date", axis=1))

# Split data into training and testing sets
train_data = data_scaled[:train_size]
test_data = data_scaled[train_size:1500]

# Define dataset class
class StockDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.FloatTensor(self.data[idx:idx+self.seq_length]),
            torch.FloatTensor(self.data[idx+self.seq_length])
        )

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Step 2: Define the model parameters
input_size = len(data.columns) - 1  # Number of features (stocks)
hidden_size = 64  # Number of LSTM units
num_layers = 2  # Number of LSTM layers
output_size = input_size  # Number of output neurons (predicted returns)
seq_length = 50  # Sequence length for each input (number of days)
num_epochs = 100
learning_rate = 0.001

# Step 3: Train the model
model = LSTM(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_dataset = StockDataset(train_data, seq_length)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

for epoch in range(num_epochs):
    for batch_inputs, batch_targets in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()

# Step 4: Generate predictions
model.eval()
with torch.no_grad():
    test_inputs = torch.FloatTensor(test_data)
    print(test_inputs.shape)
    test_loader = DataLoader(test_inputs, batch_size=1)
    predictions = []
    for inputs in test_loader:
        inputs = inputs.unsqueeze(0)
        output = model(inputs)
        predictions.append(output.squeeze().cpu().numpy())

# Step 5: Calculate returns
predicted_prices = scaler.inverse_transform(predictions)
returns = (predicted_prices[1:] - predicted_prices[:-1]) / predicted_prices[:-1]

# Step 6: Evaluate the model
# You can evaluate the model based on various metrics such as mean squared error, mean absolute error, etc.

# Repeat steps 3-6 for 250 and 500 days by changing the seq_length parameter and adjusting the code accordingly.




torch.Size([500, 14])


In [19]:
training_last_price = data.iloc[999]
training_last_price = training_last_price.drop("date")
returns_0 = (predicted_prices[0] - training_last_price) / training_last_price
print(returns_0)
# add the first return to the returns array
returns = np.insert(returns, 0, returns_0, axis=0)

print(train_data.shape, test_data.shape)
print(predicted_prices.shape)
print(returns.shape)

ADANIPORTS    0.414589
ASIANPAINT    1.519454
AXISBANK      0.955531
HDFC          0.153588
ITC           0.011753
TCS           0.056512
HINDUNILVR    0.425456
INFY          1.387708
ICICIBANK     2.132541
RELIANCE      0.088234
KOTAKBANK      0.29487
HEROMOTOCO   -0.159982
WIPRO        -0.130756
HCLTECH       0.473538
Name: 999, dtype: object
(1000, 14) (500, 14)
(500, 14)
(500, 14)


In [21]:
# evaluate the model by calculating the mean squared error for first 5 days
print(np.mean((test_data - returns)**2))

0.2723491221907192


In [22]:
# returns for next 60 days, 250 days and 500 days
returns_60 = returns[:60]
returns_250 = returns[:250]
returns_500 = returns[:500]

In [62]:
def portfolio_return(weights, returns):
    return weights.T.dot(returns)

def portfolio_variance(weights, cov_matrix):
    return weights.T.dot(cov_matrix).dot(weights)

def objective_function(weights, cov_matrix, expected_returns, risk_aversion):
    portfolio_ret = portfolio_return(weights, expected_returns)
    portfolio_var = portfolio_variance(weights, cov_matrix)
    objective = portfolio_ret - risk_aversion * portfolio_var
    return objective

def gradient(mu, cov_matrix, alpha, weights):
    portfolio_var = portfolio_variance(weights, cov_matrix)
    risk_gradient = alpha * cov_matrix.dot(weights)/portfolio_var
    return mu + 4*risk_gradient

def gradient_descent(mu, cov_matrix, alpha, learning_rate, num_iterations, threshold,random_weights, weights_value=[], objective_value=[], gradient_value=[]):
    weights = random_weights
    for i in range(num_iterations):
        gradient_val = gradient(mu, cov_matrix, alpha, weights)
        gradient_value.append(gradient_val)
        weights = weights + learning_rate * gradient_val
        weights = weights / np.sum(weights)
        weights_value.append(weights)
        objective_value.append(objective_function(weights, cov_matrix, mu, alpha))
        if np.linalg.norm(gradient(mu, cov_matrix, alpha, weights)) < threshold:
            break
    return weights, objective_value, weights_value, gradient_value

In [53]:
alpha = 0.0001
learning_rate = 1
num_iterations = 5000
threshold = 1e-8
weights_value = []
objective_value = []
gradient_value = []
random_weights = np.random.rand(returns_60.shape[1])
random_weights /= np.sum(random_weights)
cov_matrix = np.cov(returns_60.T)
expected_returns_60 = np.mean(returns_60, axis=0)
weights_60, objective_value, weights_value, gradient_value = gradient_descent(expected_returns_60, np.cov(returns_60.T), alpha, learning_rate, num_iterations, threshold, random_weights, weights_value, objective_value, gradient_value)
print(weights_60)

[ 0.05577001  0.19820625  0.12485829  0.02151797  0.00233294  0.00810091
  0.05640963  0.17954146  0.27681047  0.01332208  0.03977903 -0.01966996
 -0.01843093  0.06145185]


In [55]:
# handling negative weights
weights_60[weights_60 < 0] = 0
weights_60 /= np.sum(weights_60)
print(weights_60)

[0.05372311 0.19093159 0.12027568 0.02072821 0.00224732 0.00780359
 0.05433925 0.17295184 0.26665083 0.01283313 0.03831904 0.
 0.         0.05919641]


In [56]:
# now testing this portfolio
risk_free_rate = 0.04
portfolio_test = data.iloc[train_size:train_size+60]
portfolio_test = portfolio_test.drop(portfolio_test.columns[0], axis=1)
returns = portfolio_test.pct_change().dropna()
cov_matrix = returns.cov()
portfolio_return = np.sum(returns.mean() * weights_60) * 60
portfolio_std_dev = np.sqrt(np.dot(weights_60.T, np.dot(cov_matrix, weights_60))) * np.sqrt(60)
sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std_dev
print(f'({portfolio_return:.2f}, {portfolio_std_dev:.2f}, {sharpe_ratio:.2f})')

(0.06, 0.06, 0.33)


In [59]:
# now testing this portfolio for 250 days
alpha = 0.0001
learning_rate = 1
num_iterations = 5000
threshold = 1e-8
weights_value = []
objective_value = []
gradient_value = []
random_weights = np.random.rand(returns_250.shape[1])
random_weights /= np.sum(random_weights)
cov_matrix = np.cov(returns_250.T)
expected_returns_250 = np.mean(returns_250, axis=0)
weights_250, objective_value, weights_value, gradient_value = gradient_descent(expected_returns_250, np.cov(returns_250.T), alpha, learning_rate, num_iterations, threshold, random_weights, weights_value, objective_value, gradient_value)
print(weights_250)

[ 0.05674053  0.195066    0.12320995  0.02202835  0.00181673  0.00968527
  0.05722763  0.17802889  0.27431229  0.01335853  0.0408033  -0.01850926
 -0.01678119  0.063013  ]


In [60]:
weights_250[weights_250 < 0] = 0
weights_250 /= np.sum(weights_250)
print(weights_250)

[0.05480638 0.18841669 0.11901003 0.02127746 0.0017548  0.00935512
 0.05527688 0.17196033 0.26496167 0.01290317 0.03941242 0.
 0.         0.06086504]


In [61]:
# now testing this portfolio
risk_free_rate = 0.075
portfolio_test = data.iloc[train_size:train_size+250]
portfolio_test = portfolio_test.drop(portfolio_test.columns[0], axis=1)
returns = portfolio_test.pct_change().dropna()
cov_matrix = returns.cov()
portfolio_return = np.sum(returns.mean() * weights_250) * 250
portfolio_std_dev = np.sqrt(np.dot(weights_250.T, np.dot(cov_matrix, weights_250))) * np.sqrt(250)
sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std_dev
print(f'({portfolio_return:.2f}, {portfolio_std_dev:.2f}, {sharpe_ratio:.2f}')

(0.15, 0.13, 0.58


In [63]:
# now testing this portfolio for 250 days
alpha = 0.0001
learning_rate = 1
num_iterations = 5000
threshold = 1e-8
weights_value = []
objective_value = []
gradient_value = []
random_weights = np.random.rand(returns_500.shape[1])
random_weights /= np.sum(random_weights)
cov_matrix = np.cov(returns_500.T)
expected_returns_500 = np.mean(returns_500, axis=0)
weights_500, objective_value, weights_value, gradient_value = gradient_descent(expected_returns_500, np.cov(returns_500.T), alpha, learning_rate, num_iterations, threshold, random_weights, weights_value, objective_value, gradient_value)
print(weights_500)

[ 0.0561912   0.19780221  0.12665545  0.02218915  0.00155957  0.00752957
  0.05658444  0.17933545  0.27604564  0.01379117  0.0403184  -0.02156492
 -0.01837442  0.06193709]


In [64]:
weights_500[weights_500 < 0] = 0
weights_500 /= np.sum(weights_500)
print(weights_500)

[0.05403315 0.19020553 0.12179119 0.02133697 0.00149967 0.0072404
 0.05441129 0.172448   0.26544398 0.01326151 0.03876995 0.
 0.         0.05955837]


In [65]:
# now testing this portfolio
risk_free_rate = 0.075
portfolio_test = data.iloc[train_size:train_size+500]
portfolio_test = portfolio_test.drop(portfolio_test.columns[0], axis=1)
returns = portfolio_test.pct_change().dropna()
cov_matrix = returns.cov()
portfolio_return = np.sum(returns.mean() * weights_500) * 500
portfolio_std_dev = np.sqrt(np.dot(weights_500.T, np.dot(cov_matrix, weights_500))) * np.sqrt(500)
sharpe_ratio = (portfolio_return - risk_free_rate) / portfolio_std_dev
print(f'({portfolio_return:.2f}, {portfolio_std_dev:.2f}, {sharpe_ratio:.2f}')


(0.32, 0.21, 1.19
