In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load the dataset
data = pd.read_csv("AMZNtrain.csv")
# Display the original stock market data
print("Original Stock Market Data:")
print(data.head())

# Visualize the original stock prices
plt.figure(figsize=(10, 6))
plt.plot(data['Close'])
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.title('Original Stock Prices')
plt.show()

# Preprocess the data
train_size = int(len(data) * 0.8)
train_data = data.iloc[:train_size]['Close']
test_data = data.iloc[train_size:]['Close']

# Create SARIMA model
model = SARIMAX(train_data, order=(3, 1, 2), seasonal_order=(1, 0, 1, 12))  # Example order and seasonal_order values, you can tune them
model_fit = model.fit()

# Make predictions on the test data
y_pred = model_fit.forecast(steps=len(test_data))

# Calculate the R-squared score
r2 = r2_score(test_data, y_pred)
print("R-squared score:", r2)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(test_data, y_pred)
print("Mean Absolute Error (MAE):", mae)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(test_data, y_pred)
print("Mean Squared Error (MSE):", mse)

# Calculate the Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)

# Calculate the Mean Absolute Percentage Error (MAPE)
mape = (abs((test_data - y_pred) / test_data)).mean() * 100
print("Mean Absolute Percentage Error (MAPE):", mape)

# Visualize the predicted vs actual stock prices
plt.figure(figsize=(10, 6))

plt.plot(test_data.index, y_pred, label='Predicted')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.title('Predicted vs Actual Stock Prices')
plt.legend()
plt.show()
