# Time Series Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
data = pd.read_csv("MicrosoftStock.csv")
data

In [None]:
# Set a random seed for reproducibility
np.random.seed(0)
 
# Make sure your "Date" column is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

In [None]:
 # Sorting the data by date (if not sorted)
data = data.sort_values(by='Date')

In [None]:
 # Resetting the index
data.set_index('Date', inplace=True)
 
data.dropna(inplace=True)

In [None]:
# Decompose the time series
result = seasonal_decompose(data['Open'], model='additive', period=12) 
# Plot the decomposed components
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(15, 12), sharex=True)
result.observed.plot(ax=ax1, title='Observed')
ax1.set_ylabel('Observed')
result.trend.plot(ax=ax2, title='Trend')
ax2.set_ylabel('Trend')
result.seasonal.plot(ax=ax3, title='Seasonal')
ax3.set_ylabel('Seasonal')
result.resid.plot(ax=ax4, title='Residual')
ax4.set_ylabel('Residual')
ax4.set_xlabel('Open')
plt.tight_layout()
plt.show()

In [None]:
# Visualize the data
plt.figure(figsize=(12, 6))
plt.plot( data['Open'], label='Open Price')
plt.xlabel('Year')
plt.ylabel('Open Price')
plt.legend()
plt.title('Open Price Data')
plt.show()

In [None]:
open_prices = data['Open']
open_prices

In [None]:
# Adding lag features to the DataFrame
# for i in range(1, 13): # Creating lag features up to 13 days
# 	data[f'Lag_{i}'] = data['Open'].shift(i)

# data

In [None]:
# Adding lag features to the DataFrame
for i in range(1, 135): # Creating lag features up to 134 days
	data[f'Lag_{i}'] = open_prices.shift(i)

# Drop rows with NaN values resulting from creating lag features
data.dropna(inplace=True)

# Split the data into training and testing sets
train_size = int(0.8 * len(data))
train_data = data[:train_size]
test_data = data[train_size:]

# Define the input features (lag features) and target variable

y_train = train_data['Open']

y_test = test_data['Open']

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
# series = data['Open']
# plot_acf(series, lags=200)
# plt.xlabel('Lags')
# plt.ylabel('ACF')
# plt.show()

series = open_prices
plot_acf(series, lags=200)
plt.xlabel('Lags')
plt.ylabel('ACF')
plt.show()

In [None]:
# Calculate and print the correlation between Open price and each lagged value
for i in range(1, 20):
    correlation = data['Open'].corr(data[f'Lag_{i}'])
    print(f"AutoCorrelation between passengers and lag {i}: {correlation:.4f}")

In [None]:
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.api import AutoReg
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Create and train the autoregressive model
lag_order = 135 # Adjust this based on the ACF plot
ar_model = AutoReg(y_train, lags=lag_order)
ar_results = ar_model.fit()

In [None]:
from sklearn.metrics import r2_score
# Make predictions on the test set
y_pred = ar_results.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1, dynamic=False)
#print(y_pred)

# Calculate MAE and RMSE
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2_score = r2_score(y_test, y_pred)
print(f'Mean Absolute Error: {mae:.2f}')
print(f'Root Mean Squared Error: {rmse:.2f}')
print(f'R Squared Error: {r2_score:.2f}')

In [None]:
# Ensure the index for test_data is aligned correctly with y_pred
test_data.reset_index(drop=True, inplace=True)

# Visualize the results
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, y_test, label='Actual Open Price')
plt.plot(test_data.index, y_pred, label='Predicted Open Price', linestyle='--')
plt.xlabel('Index')
plt.ylabel('Open Price')
plt.legend()
plt.title('Open Price Prediction with Autoregressive Model')
plt.show()

In [None]:
# # Define the number of future time steps you want to predict
# forecast_steps = 365

# # Generate future indices
# future_indices = range(len(y_test), len(y_test) + forecast_steps)

# # Generate future predictions
# future_predictions = ar_results.predict(start=len(train_data), end=len(train_data) + forecast_steps - 1, dynamic=False)

# # Plot the actual data, existing predictions, and future predictions
# plt.figure(figsize=(12, 6))
# plt.plot(range(len(y_test)), y_test, label='Actual Open Price')
# plt.plot(range(len(y_test)), y_pred, label='Predicted Open Price', linestyle='--')
# plt.plot(future_indices, future_predictions[-forecast_steps:], label='Future Predictions', linestyle='--', color='red')
# plt.xlabel('Index')
# plt.ylabel('Open Price')
# plt.legend()
# plt.title('Open Price Forecast with Autoregressive Model')
# plt.show()

# Moving Average

In [None]:
data = pd.read_csv("MicrosoftStock.csv", index_col='Date', parse_dates=['Date'])

open_prices = data['Open']

train_size = int(len(open_prices) * 0.8)
train, test = open_prices[:train_size], open_prices[train_size:]

In [None]:
# # Calculate 12-day Simple Moving Average (SMA)
# data['SMA30'] = data['Open'].rolling(30).mean()

# data.dropna(inplace=True)
# data

data['SMA3'] = data['Open'].rolling(3).mean()

data.dropna(inplace=True)
data

In [None]:
# moving average of 30 days using .plot() method
# data[['Open', 'SMA30']].plot(label='Data', figsize=(16, 8))

data[['Open', 'SMA3']].plot(label='RELIANCE', figsize=(16, 8))

In [None]:
# columns_to_keep = ['Open', 'SMA30']
# data = data[columns_to_keep]
# data

In [None]:
# plt.figure(figsize=(10, 5))
# plt.plot(data, label='Original Series')
# plt.title('Time Series Plot')
# plt.xlabel('Date')
# plt.ylabel('Stock Prices')
# plt.legend()
# plt.show()

In [None]:
# data.isnull().sum()

In [None]:
data = pd.read_csv("MicrosoftStock.csv", index_col='Date', parse_dates=['Date'])

open_prices = data['Open']

train_size = int(len(open_prices) * 0.8)
train, test = open_prices[:train_size], open_prices[train_size:]

In [None]:
# from statsmodels.tsa.stattools import adfuller
# data['Open_diff'] = data['Open'].diff().dropna()

# # Check stationarity again
# result_diff = adfuller(data['Open_diff'].dropna())
# print(f'ADF Statistic (Differenced): {result_diff[0]}')
# print(f'p-value (Differenced): {result_diff[1]}')


from statsmodels.tsa.stattools import adfuller
differenced_series = open_prices.diff().dropna()

# Check stationarity again
result_diff = adfuller(differenced_series.dropna())
print(f'ADF Statistic (Differenced): {result_diff[0]}')
print(f'p-value (Differenced): {result_diff[1]}')

In [None]:
from statsmodels.tsa.stattools import adfuller

# Check for stationarity
result = adfuller(data['Open'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# Since the p-value is > 0.05, the data is not stationary. We need to difference it.
data_diff = data.diff(1).dropna()

# Check for stationarity again
result = adfuller(data_diff['Open'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# Plot the differenced data
plt.figure(figsize=(10, 5))
plt.plot(differenced_series)
plt.title('Differenced Monthly Open Prices')
plt.xlabel('Year')
plt.ylabel('Open Prices')
plt.show()

In [None]:
# train_len = int(0.8 * len(data_diff))
# arma_train = data_diff[:train_len]
# arma_test = data_diff[train_len:]
# arma_train = arma_train['Open']
# arma_test = arma_test['Open']

In [None]:
# from statsmodels.tsa.arima.model import ARIMA

# # Fit the ARMA(1, 1) model
# model = ARIMA(arma_train, order=(12, 0, 1))
# model_fit = model.fit()

# # Print the model summary
# print(model_fit.summary())

In [None]:
# # Make predictions
# start = len(arma_train)
# end = len(arma_train) + len(arma_test) -1
# predictions = model_fit.predict(start=start, end=end)

# # Plot the results
# plt.figure(figsize=(10, 5))
# plt.plot(data_diff, label='Differenced Original Series')
# plt.plot(predictions, label='Predictions', color='red')
# plt.legend()
# plt.title('ARMA Model Predictions on Stocks Data')
# plt.xlabel('Year')
# plt.ylabel('Stocks')
# plt.show()

In [None]:
# from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
# r2_score(arma_test, predictions), mean_squared_error(arma_test, predictions), mean_absolute_error(arma_test, predictions)

In [None]:
# train_len = int(0.8 * len(data))
# arima_train = data[:train_len]
# arima_test =data[train_len:]
# arima_train = arima_train['Open']
# arima_test = arima_test['Open']

In [None]:
train_size = int(len(open_prices) * 0.8)
train, test = open_prices[:train_size], open_prices[train_size:]

In [None]:
%%time
from statsmodels.tsa.arima.model import ARIMA

# Fit the ARIMA model #100 sunce in autocor it was good until 130 #30 since above used rolling 30 days and was good
model = ARIMA(train, order=(130, 2, 13))
model_fit = model.fit()

# Print the model summary
print(model_fit.summary())

In [None]:
residuals = model_fit.resid

plt.figure(figsize=(12, 6))

# Residuals plot
plt.subplot(1, 2, 1)
plt.plot(residuals)
plt.title('Residuals')

# Histogram of residuals
plt.subplot(1, 2, 2)
import seaborn as sns
sns.histplot(residuals, kde=True)
plt.title('Residuals Histogram')

plt.show()

In [None]:
# Create and train the autoregressive model
lag_order = 135 # Adjust this based on the ACF plot
ar_model = AutoReg(train, lags=lag_order)
ar_results = ar_model.fit()

In [None]:
forecast = ar_results.predict(start=len(train), end=len(train) + len(test) -1, dynamic=False)

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
r2_score(test, forecast), mean_squared_error(test, forecast), mean_absolute_error(test, forecast)

In [None]:
# Make predictions
start = train_len
end = len(data)-1
arima_predictions = model_fit.predict(start=start, end=end)

# Plot the results
plt.figure(figsize=(10, 5))
plt.plot(data['Open'], label='Original Series')
plt.plot(arima_test.index, arima_predictions,label='Predictions', color='red')
plt.legend()
plt.title('ARIMA Model Predictions on Stocks Data')
plt.xlabel('Year')
plt.ylabel('Stocks')
plt.show()

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
r2_score(arima_test, arima_predictions), mean_squared_error(arima_test, arima_predictions), mean_absolute_error(arima_test, arima_predictions)

In [None]:
print(arima_test.shape)
print(arima_predictions.shape)
print(f'Length of arima_test: {len(arima_test)}')
print(f'Length of arima_predictions: {len(arima_predictions)}')