# Amazon Stock Linear Gradient Descent Example

## Setup
First, let's import the necessary libraries and load the Amazon stock data.

In [None]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gradientdescent as gd

print("Gradient Descent module loaded successfully!")

In [None]:
# Load Amazon stock data
amazon_data = pd.read_csv('../../data/Amazon.csv')

# Display the first few rows
amazon_data.head()

## Data Exploration
Let's explore the Amazon stock data to understand its structure and characteristics.

In [None]:
# Basic information about the dataset
print("Dataset shape:", amazon_data.shape)
print("\nDataset info:")
amazon_data.info()

print("\nSummary statistics:")
amazon_data.describe()

In [None]:
# Convert Date to datetime format
amazon_data['Date'] = pd.to_datetime(amazon_data['Date'])

# Plot the closing price over time
plt.figure(figsize=(12, 6))
plt.plot(amazon_data['Date'], amazon_data['Close'], 'b-', linewidth=1)
plt.title('Amazon Stock Closing Price (1997-2008)')
plt.xlabel('Date')
plt.ylabel('Closing Price ($)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Data Preparation
Let's prepare the data for our linear regression model. We'll create features based on previous days' closing prices to predict the next day's closing price.

In [None]:
# Create features: previous n days' closing prices
def create_features(data, n_prev_days=5):
    """Create features from previous days' closing prices."""
    features = []
    targets = []
    
    for i in range(n_prev_days, len(data)):
        # Get previous n days' closing prices as features
        prev_prices = data.iloc[i-n_prev_days:i]['Close'].values
        features.append(prev_prices)
        
        # Current day's closing price as target
        targets.append(data.iloc[i]['Close'])
    
    return np.array(features), np.array(targets)

# Create features and targets using 5 previous days
n_prev_days = 5
X, y = create_features(amazon_data, n_prev_days)

print(f"Features shape: {X.shape}")
print(f"Targets shape: {y.shape}")
print(f"\nSample feature (previous {n_prev_days} days' closing prices):")
print(X[0])
print(f"Corresponding target (next day's closing price): {y[0]}")

In [None]:
# Split data into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training set: {X_train.shape[0]} samples")
print(f"Testing set: {X_test.shape[0]} samples")

### Feature Scaling
Let's normalize our features to improve the convergence of gradient descent.

In [None]:
# Normalize features
X_mean = np.mean(X_train, axis=0)
X_std = np.std(X_train, axis=0)

X_train_norm = (X_train - X_mean) / X_std
X_test_norm = (X_test - X_mean) / X_std

# Normalize targets
y_mean = np.mean(y_train)
y_std = np.std(y_train)

y_train_norm = (y_train - y_mean) / y_std
y_test_norm = (y_test - y_mean) / y_std

print("Normalized training features (first sample):", X_train_norm[0])
print("Normalized training target (first sample):", y_train_norm[0])

## Linear Regression with Gradient Descent
Now, let's implement linear regression using our gradient descent library.

In [None]:
# Convert data to the format expected by our C++ code
X_train_list = X_train_norm.tolist()
y_train_list = y_train_norm.tolist()

# Initialize weights with random values
np.random.seed(42)
w = [gd.Variable.create(np.random.randn() * 0.1, True) for _ in range(n_prev_days)]
b = gd.Variable.create(np.random.randn() * 0.1, True)  # bias term

print(f"Initial weights: {[w_i.value for w_i in w]}")
print(f"Initial bias: {b.value}")

# Create loss function and optimizer
loss_fn = gd.MSE()
optimizer = gd.Vanilla()

In [None]:
# Training parameters
learning_rate = 0.01
n_epochs = 1000

# Training loop
losses = []
weights_history = []

# Include bias in the weights list for the optimizer
all_weights = w + [b]

for epoch in range(n_epochs):
    # Train one step with all weights including bias
    optimizer.train(all_weights, X_train_list, y_train_list, loss_fn, learning_rate)
    
    # Compute current predictions and loss for monitoring
    y_pred = []
    for i in range(len(X_train_list)):
        pred = gd.Variable.create(0.0)
        for j in range(n_prev_days):
            x_ij = gd.Variable.create(X_train_list[i][j])
            pred = pred + w[j] * x_ij
        pred = pred + b  # Add bias
        y_pred.append(pred)
    
    loss = loss_fn.compute(y_pred, y_train_list)
    losses.append(loss.value)
    
    # Store current weights
    weights_history.append([w_i.value for w_i in w] + [b.value])
    
    # Print progress
    if epoch % 100 == 0 or epoch == n_epochs - 1:
        print(f"Epoch {epoch}: Loss = {loss.value:.6f}")

print(f"\nFinal weights: {[w_i.value for w_i in w]}")
print(f"Final bias: {b.value}")

## Model Evaluation
Let's evaluate our model on the test set.

In [None]:
# Make predictions on the test set
y_test_pred_norm = []
for i in range(len(X_test_norm)):
    pred = b.value  # Start with bias
    for j in range(n_prev_days):
        pred += w[j].value * X_test_norm[i][j]
    y_test_pred_norm.append(pred)

# Convert normalized predictions back to original scale
y_test_pred = np.array(y_test_pred_norm) * y_std + y_mean

# Calculate Mean Squared Error (MSE) on the test set
mse = np.mean((y_test - y_test_pred) ** 2)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(y_test - y_test_pred))

print(f"Test MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")
print(f"Test MAE: {mae:.4f}")

In [None]:
# Plot actual vs predicted values
plt.figure(figsize=(12, 6))

# Get the dates for the test set
test_dates = amazon_data['Date'].iloc[train_size + n_prev_days:]

plt.plot(test_dates, y_test, 'b-', label='Actual', linewidth=1)
plt.plot(test_dates, y_test_pred, 'r-', label='Predicted', linewidth=1)
plt.title('Amazon Stock Price: Actual vs Predicted')
plt.xlabel('Date')
plt.ylabel('Stock Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Training Visualization
Let's visualize the training process.

In [None]:
# Convert weights history to numpy array
weights_history = np.array(weights_history)

# Plot loss over epochs
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(losses, 'b-', linewidth=2)
plt.title('Training Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.grid(True, alpha=0.3)

# Plot weight convergence
plt.subplot(1, 2, 2)
for i in range(n_prev_days):
    plt.plot(weights_history[:, i], label=f'w[{i}]')
plt.plot(weights_history[:, -1], 'k--', label='bias')
plt.title('Weight Convergence')
plt.xlabel('Epoch')
plt.ylabel('Weight Value')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Feature Importance
Let's analyze the importance of each feature (previous days' closing prices) in our model.

In [None]:
# Get the final weights
final_weights = np.array([w_i.value for w_i in w])

# Create labels for each day
day_labels = [f'Day -{n_prev_days-i}' for i in range(n_prev_days)]

# Plot feature importance
plt.figure(figsize=(10, 6))
plt.bar(day_labels, final_weights)
plt.title('Feature Importance: Impact of Previous Days on Stock Price Prediction')
plt.xlabel('Previous Days')
plt.ylabel('Weight Value')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Print the weights
print("Feature importance (weights):")
for i, label in enumerate(day_labels):
    print(f"{label}: {final_weights[i]:.4f}")
print(f"Bias: {b.value:.4f}")