# Machine Learning Analysis of NVIDIA Stock Price

In this notebook, we'll analyze NVIDIA stock price data using different machine learning models and compare their performance.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

## Data Loading and Preprocessing

In [None]:
# Load the data
df = pd.read_csv('NVDA.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

# Create features and target
df['Returns'] = df['Close'].pct_change()
df['Target'] = df['Returns'].shift(-1)
df = df.dropna()

# Create feature matrix X and target vector y
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Returns']
X = df[features]
y = df['Target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Model Training and Evaluation

In [None]:
# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'SVR': SVR(kernel='rbf')
}

# Train and evaluate models
results = {}
predictions = {}

for name, model in models.items():
    # Train the model
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    train_pred = model.predict(X_train_scaled)
    test_pred = model.predict(X_test_scaled)
    predictions[name] = test_pred
    
    # Calculate metrics
    results[name] = {
        'Train R2': r2_score(y_train, train_pred),
        'Test R2': r2_score(y_test, test_pred),
        'Train MSE': mean_squared_error(y_train, train_pred),
        'Test MSE': mean_squared_error(y_test, test_pred)
    }

## Results Visualization

In [None]:
# Create results table
results_df = pd.DataFrame(results).T
print("Model Performance Metrics:")
print(results_df.round(4))

# Plot R2 scores
plt.figure(figsize=(10, 6))
x = np.arange(len(models))
width = 0.35

plt.bar(x - width/2, [results[m]['Train R2'] for m in models], width, label='Train R2')
plt.bar(x + width/2, [results[m]['Test R2'] for m in models], width, label='Test R2')

plt.xlabel('Models')
plt.ylabel('R2 Score')
plt.title('Model Performance Comparison')
plt.xticks(x, models.keys(), rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

## Prediction Visualization

In [None]:
# Plot actual vs predicted values for each model
fig, axes = plt.subplots(len(models), 1, figsize=(12, 4*len(models)))
fig.suptitle('Actual vs Predicted Returns')

for (name, pred), ax in zip(predictions.items(), axes):
    ax.scatter(y_test, pred, alpha=0.5)
    ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    ax.set_xlabel('Actual Returns')
    ax.set_ylabel('Predicted Returns')
    ax.set_title(f'{name}')

plt.tight_layout()
plt.show()