In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# Load the dataset
df = pd.read_csv('sp500_data.csv')

# Filter the dataframe to only contain ticker 'NVDA'
df = df[df['Ticker'] == 'NVDA']

# Reset the index after filtering
df = df.reset_index(drop=True)

print("Filtered dataframe for NVDA:")
print(f"Total rows for NVDA: {len(df)}")
df.head()


Filtered dataframe for NVDA:
Total rows for NVDA: 6463


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,1999-01-22,0.04375,0.048828,0.038802,0.041016,0.037618,2714688000.0,NVDA
1,1999-01-25,0.044271,0.045833,0.041016,0.045313,0.041559,510480000.0,NVDA
2,1999-01-26,0.045833,0.046745,0.041146,0.041797,0.038334,343200000.0,NVDA
3,1999-01-27,0.041927,0.042969,0.039583,0.041667,0.038215,244368000.0,NVDA
4,1999-01-28,0.041667,0.041927,0.041276,0.041536,0.038095,227520000.0,NVDA


In [None]:
#  Split the data into train+validation and test sets
# Define features (X) and target variable (y)
X = df.drop('Close', axis=1)  # Assuming 'Close' is the target variable
y = df['Close']

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Further split train+validation into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

print(f"Train set shape: {X_train.shape}")
print(f"Validation set shape: {X_val.shape}")
print(f"Test set shape: {X_test.shape}")

# Define the simple regression model
class SimpleRegressionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train.values).to(device)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1).to(device)
X_val_tensor = torch.FloatTensor(X_val.values).to(device)
y_val_tensor = torch.FloatTensor(y_val.values).unsqueeze(1).to(device)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Initialize the model
input_size = X_train.shape[1]
hidden_size = 64
num_layers = 2
output_size = 1

model = SimpleRegressionModel(input_size, output_size).to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}')

# Evaluate on validation set
model.eval()
with torch.no_grad():
    val_predictions = model(X_val_tensor).cpu().numpy()
    val_mse = mean_squared_error(y_val, val_predictions)
    val_r2 = r2_score(y_val, val_predictions)

print(f"Validation MSE: {val_mse:.4f}")
print(f"Validation R2 Score: {val_r2:.4f}")

# Plot actual vs predicted values for validation set
plt.figure(figsize=(10, 6))
plt.scatter(y_val, val_predictions, alpha=0.5)
plt.plot([y_val.min(), y_val.max()], [y_val.min(), y_val.max()], 'r--', lw=2)
plt.xlabel("Actual Close Price")
plt.ylabel("Predicted Close Price")
plt.title("Actual vs Predicted Close Prices (Validation Set)")
plt.show()
