# NVIDIA Stock Price Time Series Analysis with ARIMA Model

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Set plot style
plt.style.use('ggplot')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Data Loading and Exploration

In [None]:
# Load the NVIDIA stock price data
df = pd.read_csv('NVDA.csv')

# Convert the Date column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Set Date as index
df.set_index('Date', inplace=True)

# Display the first few rows of the dataset
df.head()

In [None]:
# Display basic information about the dataset
print(f"Dataset Shape: {df.shape}")
print("\nDataset Info:")
df.info()

print("\nDescriptive Statistics:")
df.describe()

## 2. Time Series Visualization

In [None]:
# Visualize the closing price
plt.figure(figsize=(14, 7))
plt.plot(df.index, df['Close'], color='blue')
plt.title('NVIDIA Stock Closing Price (1999-2006)', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Closing Price ($)', fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Plot trading volume
plt.figure(figsize=(14, 5))
plt.bar(df.index, df['Volume'], color='green', alpha=0.7)
plt.title('NVIDIA Trading Volume Over Time', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Volume', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Calculate and plot daily returns
df['Daily_Return'] = df['Close'].pct_change() * 100

plt.figure(figsize=(14, 5))
plt.plot(df.index, df['Daily_Return'], color='purple', alpha=0.8)
plt.title('NVIDIA Daily Returns (%)', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Daily Return (%)', fontsize=14)
plt.tight_layout()
plt.show()

## 3. Stationarity Testing

For time series modeling with ARIMA, the data should be stationary. Let's test if our data is stationary using the Augmented Dickey-Fuller (ADF) test.

In [None]:
def test_stationarity(timeseries):
    # Determine rolling statistics
    rolling_mean = timeseries.rolling(window=12).mean()
    rolling_std = timeseries.rolling(window=12).std()
    
    # Plot rolling statistics
    plt.figure(figsize=(14, 7))
    plt.plot(timeseries, color='blue', label='Original')
    plt.plot(rolling_mean, color='red', label='Rolling Mean')
    plt.plot(rolling_std, color='green', label='Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation', fontsize=16)
    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Price', fontsize=14)
    plt.tight_layout()
    plt.show()
    
    # Perform ADF test
    result = adfuller(timeseries.dropna())
    print('Augmented Dickey-Fuller Test Results:')
    labels = ['ADF Test Statistic', 'p-value', '# Lags Used', '# Observations']
    for value, label in zip(result, labels):
        print(f'{label}: {value}')
    
    print(f'Critical Values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value}')
        
    if result[1] < 0.05:
        print("\nResult: The series is stationary (reject H0)")
    else:
        print("\nResult: The series is non-stationary (fail to reject H0)")

# Test stationarity of closing prices
test_stationarity(df['Close'])

In [None]:
# Apply differencing to make the data stationary
df['Close_diff'] = df['Close'].diff()

# Test stationarity of differenced series
test_stationarity(df['Close_diff'].dropna())

## 4. ACF and PACF Plots for Model Order Determination

In [None]:
# Create ACF and PACF plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 5))

plot_acf(df['Close_diff'].dropna(), ax=ax1, lags=30)
ax1.set_title('Autocorrelation Function (ACF)', fontsize=14)

plot_pacf(df['Close_diff'].dropna(), ax=ax2, lags=30, method='ywm')
ax2.set_title('Partial Autocorrelation Function (PACF)', fontsize=14)

plt.tight_layout()
plt.show()

## 5. Auto ARIMA for Optimal Model Selection

In [None]:
# Use auto_arima to find the optimal ARIMA model
auto_model = auto_arima(df['Close'], 
                         seasonal=False,
                         test='adf',
                         trace=True,
                         error_action='ignore',
                         suppress_warnings=True,
                         stepwise=True)

In [None]:
# Print the summary of the auto-selected model
print(auto_model.summary())

## 6. Training and Testing ARIMA Model

In [None]:
# Determine optimal parameters from auto_arima
optimal_order = auto_model.order
print(f"Optimal ARIMA order: {optimal_order}")

# Split the data into training and testing sets (80% train, 20% test)
train_size = int(len(df) * 0.8)
train_data = df['Close'][:train_size]
test_data = df['Close'][train_size:]

print(f"Training data points: {len(train_data)}")
print(f"Testing data points: {len(test_data)}")

In [None]:
# Fit ARIMA model on the training data
model = ARIMA(train_data, order=optimal_order)
model_fit = model.fit()

# Display model summary
print(model_fit.summary())

## 7. Model Performance Evaluation

In [None]:
# Generate predictions for test data
predictions = model_fit.forecast(steps=len(test_data))

# Create a DataFrame with actual and predicted values
forecast_df = pd.DataFrame({
    'Actual': test_data.values, 
    'Predicted': predictions
}, index=test_data.index)

# Calculate error metrics
mse = mean_squared_error(forecast_df['Actual'], forecast_df['Predicted'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(forecast_df['Actual'], forecast_df['Predicted'])
r2 = r2_score(forecast_df['Actual'], forecast_df['Predicted'])
mape = np.mean(np.abs((forecast_df['Actual'] - forecast_df['Predicted']) / forecast_df['Actual'])) * 100

# Display the performance metrics in a table
metrics_df = pd.DataFrame({
    'Metric': ['Mean Squared Error (MSE)', 'Root Mean Squared Error (RMSE)', 
              'Mean Absolute Error (MAE)', 'R-squared', 'Mean Absolute Percentage Error (MAPE)'],
    'Value': [mse, rmse, mae, r2, mape]
})

print("ARIMA Model Performance Metrics:")
metrics_df

In [None]:
# Plot the actual vs predicted values
plt.figure(figsize=(14, 7))
plt.plot(train_data, label='Training Data', color='blue')
plt.plot(test_data, label='Actual Test Data', color='green')
plt.plot(forecast_df['Predicted'], label='Predictions', color='red', linestyle='--')
plt.title('NVIDIA Stock Price: Actual vs Predicted', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Stock Price', fontsize=14)
plt.legend(loc='best')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Zoom in on the test period for a better comparison
plt.figure(figsize=(14, 7))
plt.plot(test_data, label='Actual', color='green', marker='o', markersize=2)
plt.plot(forecast_df['Predicted'], label='Predicted', color='red', linestyle='--')
plt.title('NVIDIA Stock Price (Test Period): Actual vs Predicted', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Stock Price', fontsize=14)
plt.legend(loc='best')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Plot residuals
residuals = forecast_df['Actual'] - forecast_df['Predicted']

plt.figure(figsize=(14, 7))
plt.plot(residuals, color='purple')
plt.axhline(y=0, color='black', linestyle='--')
plt.title('ARIMA Model Residuals', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Residual Value', fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.show()

# Plot residuals histogram
plt.figure(figsize=(14, 5))
sns.histplot(residuals, kde=True, color='purple')
plt.title('Distribution of Residuals', fontsize=16)
plt.xlabel('Residual Value', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.show()

## 8. Forecasting Future Values

In [None]:
# Fit ARIMA model on the entire dataset
full_model = ARIMA(df['Close'], order=optimal_order)
full_model_fit = full_model.fit()

# Forecast for the next 30 days
forecast_steps = 30
forecast = full_model_fit.forecast(steps=forecast_steps)
forecast_dates = pd.date_range(start=df.index[-1], periods=forecast_steps+1, freq='B')[1:]

# Create a DataFrame for the forecast
forecast_df_future = pd.DataFrame({
    'Forecasted_Close': forecast
}, index=forecast_dates)

# Display the forecasted values
print("Forecasted NVIDIA Stock Prices for Next 30 Business Days:")
forecast_df_future

In [None]:
# Plot the historical data and the forecast
plt.figure(figsize=(16, 8))

# Plot historical data
plt.plot(df.index, df['Close'], label='Historical Data', color='blue')

# Plot forecast
plt.plot(forecast_df_future.index, forecast_df_future['Forecasted_Close'], 
         label='Forecast', color='red', linestyle='--')

# Add a vertical line to separate historical data from forecast
plt.axvline(x=df.index[-1], color='black', linestyle=':', alpha=0.5)

# Add shaded area for forecast confidence interval (you could calculate this more accurately)
plt.fill_between(forecast_df_future.index, 
                 forecast_df_future['Forecasted_Close'] * 0.95, 
                 forecast_df_future['Forecasted_Close'] * 1.05, 
                 color='red', alpha=0.2, label='95% Confidence Interval')

plt.title('NVIDIA Stock Price Forecast', fontsize=18)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Stock Price', fontsize=14)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## 9. Conclusion

In this analysis, we've built an ARIMA model to forecast NVIDIA stock prices. The model shows reasonable performance as demonstrated by the evaluation metrics and plots. The forecasting capability provides insight into potential future price movements based on historical patterns.

Key findings:
1. The ARIMA model with optimal order parameters helps capture time series patterns in stock price data
2. The residuals analysis indicates how well our model fits the actual data
3. The performance metrics quantify the accuracy of our predictions
4. The forecasted values offer a perspective on future price movements

It's important to note that while time series models like ARIMA are useful for stock price prediction, actual market performance is influenced by many external factors not captured in historical price data alone.