In [None]:
import yfinance as yf
import pandas as pd
import torch
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW
from tqdm import tqdm
import matplotlib.pyplot as plt

# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Collect Apple stock price data
apple = yf.Ticker("AAPL")
data = apple.history(period="5y")
data = data[['Close']].reset_index()

# Preprocess the data
scaler = MinMaxScaler()
data['Close_scaled'] = scaler.fit_transform(data[['Close']])

# Define a custom dataset for time series forecasting
class StockPriceDataset(Dataset):
    def __init__(self, data, input_length, output_length):
        self.data = data
        self.input_length = input_length
        self.output_length = output_length

    def __len__(self):
        return len(self.data) - self.input_length - self.output_length

    def __getitem__(self, idx):
        input_sequence = self.data[idx: idx + self.input_length]
        target_sequence = self.data[idx + self.input_length: idx + self.input_length + self.output_length]
        return {
            "input_text": " ".join(map(str, input_sequence)),
            "target_text": " ".join(map(str, target_sequence)),
        }

# Set the input and output sequence length for prediction
input_length = 60
output_length = 30

# Prepare dataset and DataLoader
dataset = StockPriceDataset(data['Close_scaled'].values, input_length, output_length)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Load a pre-trained T5 tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

# Move model to the GPU if available
model = model.to(device)

# Fine-tuning setup
optimizer = AdamW(model.parameters(), lr=5e-5)

# Fine-tune the model with GPU support
model.train()
for epoch in range(5):  # Adjust the number of epochs as needed
    epoch_loss = 0
    for batch in tqdm(dataloader):
        input_sequences = batch['input_text']
        target_sequences = batch['target_text']

        # Tokenize input and target sequences
        input_ids = tokenizer(input_sequences, return_tensors="pt", padding=True, truncation=True).input_ids
        labels = tokenizer(target_sequences, return_tensors="pt", padding=True, truncation=True).input_ids

        # Move input data to GPU
        input_ids = input_ids.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss

        # Backward pass and optimization step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1} completed. Loss: {epoch_loss / len(dataloader)}")

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_t5_stock_model")

# Predict on a test sample
model.eval()
test_sequence = data['Close_scaled'].values[-input_length:].reshape(1, -1)  # Last input_length data points
test_input = " ".join(map(str, test_sequence[0]))
input_ids = tokenizer(test_input, return_tensors="pt").input_ids.to(device)  # Move input to GPU

# Generate predictions
with torch.no_grad():
    outputs = model.generate(input_ids, max_length=output_length)

predicted_sequence = tokenizer.decode(outputs[0], skip_special_tokens=True)
predicted_sequence = list(map(float, predicted_sequence.split()))

# Inverse scale the predicted values
predicted_sequence = scaler.inverse_transform(np.array(predicted_sequence).reshape(-1, 1))

# Plot the predicted and actual values
plt.figure(figsize=(10, 6))
plt.plot(data['Close'].values[-output_length:], label="Actual Prices")
plt.plot(predicted_sequence, label="Predicted Prices")
plt.title("Apple Stock Price Prediction")
plt.legend()
plt.show()


Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
  0%|          | 0/73 [00:00<?, ?it/s]