In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader

In [2]:
class TimeSeriesData(Dataset):
    def __init__(self, features, targets):
        self.features = torch.FloatTensor(features)
        self.targets = torch.FloatTensor(targets)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

class PriceNet(nn.Module):
    def __init__(self, input_dim=5, hidden_dim=128, layer_count=3):
        super(PriceNet, self).__init__()
        self.lstm_net = nn.LSTM(input_dim, hidden_dim, layer_count, batch_first=True)
        self.predictor = nn.Linear(hidden_dim, 5)
        
    def forward(self, seq_input):
        lstm_result, _ = self.lstm_net(seq_input)
        pred = self.predictor(lstm_result[:, -1, :])
        return pred

In [3]:
def process_timeseries(raw_data, seq_len=10):
    # Extract market data
    market_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    market_data = raw_data[market_cols].values
    
    # Normalize
    norm_tool = MinMaxScaler()
    norm_data = norm_tool.fit_transform(market_data)
    
    # Create sequences
    seq_features, seq_targets = [], []
    for idx in range(len(norm_data) - seq_len):
        seq_features.append(norm_data[idx:(idx + seq_len)])
        seq_targets.append(norm_data[idx + seq_len])
    
    return np.array(seq_features), np.array(seq_targets), norm_tool

def run_training(data_loader, net, loss_fn, opt, epoch_num):
    history = []
    for epoch in range(epoch_num):
        net.train()
        epoch_losses = []
        for batch_x, batch_y in data_loader:
            opt.zero_grad()
            out = net(batch_x)
            loss = loss_fn(out, batch_y)
            loss.backward()
            opt.step()
            epoch_losses.append(loss.item())
        
        mean_loss = sum(epoch_losses) / len(epoch_losses)
        history.append(mean_loss)
        print(f'Epoch [{epoch+1}/{epoch_num}], Loss: {mean_loss:.4f}')
    return history

# Data prep
def prep_data(filename):
    df = pd.read_csv(filename)
    # Remove commas from price columns as well as volume
    price_cols = ['Open', 'High', 'Low', 'Close']
    df[price_cols] = df[price_cols].replace({',': ''}, regex=True).astype(float)
    df['Volume'] = df['Volume'].str.replace(',', '').astype(float)
    return df

In [4]:
train_data = prep_data('Google_Stock_Price_Train.csv')
test_data = prep_data('Google_Stock_Price_Test.csv')

# Process training data
X_seq, y_seq, scaler = process_timeseries(train_data)
ts_dataset = TimeSeriesData(X_seq, y_seq)
ts_loader = DataLoader(ts_dataset, batch_size=32, shuffle=True)

# Initialize and train
price_model = PriceNet()
mse_loss = nn.MSELoss()
adam_opt = torch.optim.Adam(price_model.parameters(), lr=0.001)
train_hist = run_training(ts_loader, price_model, mse_loss, adam_opt, 50)

# Save trained model
torch.save(price_model.state_dict(), 'market_predictor.pth')

Epoch [1/50], Loss: 0.0619
Epoch [2/50], Loss: 0.0131
Epoch [3/50], Loss: 0.0077
Epoch [4/50], Loss: 0.0032
Epoch [5/50], Loss: 0.0031
Epoch [6/50], Loss: 0.0026
Epoch [7/50], Loss: 0.0025
Epoch [8/50], Loss: 0.0024
Epoch [9/50], Loss: 0.0025
Epoch [10/50], Loss: 0.0024
Epoch [11/50], Loss: 0.0024
Epoch [12/50], Loss: 0.0023
Epoch [13/50], Loss: 0.0023
Epoch [14/50], Loss: 0.0022
Epoch [15/50], Loss: 0.0022
Epoch [16/50], Loss: 0.0023
Epoch [17/50], Loss: 0.0021
Epoch [18/50], Loss: 0.0022
Epoch [19/50], Loss: 0.0020
Epoch [20/50], Loss: 0.0020
Epoch [21/50], Loss: 0.0022
Epoch [22/50], Loss: 0.0019
Epoch [23/50], Loss: 0.0019
Epoch [24/50], Loss: 0.0018
Epoch [25/50], Loss: 0.0018
Epoch [26/50], Loss: 0.0020
Epoch [27/50], Loss: 0.0017
Epoch [28/50], Loss: 0.0017
Epoch [29/50], Loss: 0.0017
Epoch [30/50], Loss: 0.0017
Epoch [31/50], Loss: 0.0016
Epoch [32/50], Loss: 0.0017
Epoch [33/50], Loss: 0.0016
Epoch [34/50], Loss: 0.0016
Epoch [35/50], Loss: 0.0016
Epoch [36/50], Loss: 0.0017
E

In [5]:
# Visualization functions
def viz_train_progress(loss_hist):
    plt.figure(figsize=(10, 6))
    plt.plot(loss_hist)
    plt.title('Training Progress')
    plt.xlabel('Epochs')
    plt.ylabel('MSE Loss')
    plt.savefig('train_progress.png')
    plt.close()

def viz_market_correlation(df):
    plt.figure(figsize=(10, 8))
    sns.heatmap(df[['Open', 'High', 'Low', 'Close', 'Volume']].corr(), 
                annot=True, cmap='viridis')
    plt.title('Market Metrics Correlation')
    plt.savefig('market_correlation.png')
    plt.close()

def viz_price_dist(df):
    plt.figure(figsize=(15, 10))
    metrics = ['Open', 'High', 'Low', 'Close']
    for i, metric in enumerate(metrics, 1):
        plt.subplot(2, 2, i)
        sns.histplot(df[metric], kde=True)
        plt.title(f'{metric} Distribution')
    plt.tight_layout()
    plt.savefig('price_distributions.png')
    plt.close()

def viz_volume_trend(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df.index, df['Volume'])
    plt.title('Trading Volume Trend')
    plt.xlabel('Trading Days')
    plt.ylabel('Volume')
    plt.savefig('volume_trend.png')
    plt.close()

def viz_market_trend(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df.index, df['Close'], label='Close')
    plt.plot(df.index, df['Open'], label='Open')
    plt.title('Market Price Evolution')
    plt.xlabel('Trading Days')
    plt.ylabel('Price')
    plt.legend()
    plt.savefig('market_trend.png')
    plt.close()

# Generate visualizations
viz_train_progress(train_hist)
viz_market_correlation(train_data)
viz_price_dist(train_data)
viz_volume_trend(train_data)
viz_market_trend(train_data)

In [6]:
# Test predictions
price_model.eval()
test_X, test_y, _ = process_timeseries(test_data)
test_dataset = TimeSeriesData(test_X, test_y)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

pred_prices = []
real_prices = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        pred = price_model(X_batch)
        pred_prices.append(pred.numpy())
        real_prices.append(y_batch.numpy())

pred_prices = scaler.inverse_transform(np.array(pred_prices).squeeze())
real_prices = scaler.inverse_transform(np.array(real_prices).squeeze())

In [7]:
def viz_prediction_accuracy(pred, real):
    plt.figure(figsize=(12, 6))
    plt.plot(real[:, 3], label='Actual')
    plt.plot(pred[:, 3], label='Predicted')
    plt.title('Price Prediction Performance')
    plt.xlabel('Time Steps')
    plt.ylabel('Price')
    plt.legend()
    plt.savefig('prediction_accuracy.png')
    plt.close()

viz_prediction_accuracy(pred_prices, real_prices)