In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

In [None]:
data = pd.read_csv('/kaggle/input/amzn-dpz-btc-ntfx-adjusted-may-2013may2019/portfolio_data.csv')
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.columns

In [None]:
# 1. Data Loading and Exploration
# Load the dataset
# data_path = "/kaggle/input/amzn-dpz-btc-ntfx-adjusted-may-2013may2019/portfolio_data.csv"  # Change this to your dataset path
# df = pd.read_csv(data_path)


# Convert 'Date' column to datetime format and set as index
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Display the first few rows of the dataset
data.head()

In [None]:
# Check for missing values
missing_values = data.isna().sum()
print(missing_values)

# Handle missing values (if any)
data = data.dropna()

In [None]:
# 3. Data Visualization
# Plot the time series for each stock
plt.figure(figsize=(14, 10))
for i, column in enumerate(data.columns, 1):
    plt.subplot(len(data.columns), 1, i)
    plt.plot(data[column])
    plt.title(f'Time Series for {column}')
plt.tight_layout()
plt.show()

In [None]:
# 4. Decomposition of Time Series
# Decompose each time series
decompositions = {}
for column in data.columns:
    decompositions[column] = seasonal_decompose(data[column], model='multiplicative', period=365)
    decompositions[column].plot()
    plt.title(f'Decomposition of {column}')
    plt.show()

In [None]:
# 5. Statistical Analysis
# Summary statistics
summary_statistics = data.describe()
print(summary_statistics)

# Calculate and plot moving averages
plt.figure(figsize=(14, 10))
for i, column in enumerate(data.columns, 1):
    plt.subplot(len(data.columns), 1, i)
    plt.plot(data[column], label='Original')
    plt.plot(data[column].rolling(window=30).mean(), label='30-Day Moving Average')
    plt.title(f'Moving Averages for {column}')
    plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# 6. Stationarity Testing
def adf_test(series):
    result = adfuller(series)
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    for key, value in result[4].items():
        print(f'Critical Value {key}: {value}')

# Perform ADF test
for column in data.columns:
    print(f'Stationarity Test for {column}')
    adf_test(data[column])
    print('\n')

In [None]:
# 7. Autocorrelation and Partial Autocorrelation Analysis
plt.figure(figsize=(14, 10))
for i, column in enumerate(data.columns, 1):
    plt.subplot(len(data.columns), 2, 2*i-1)
    plot_acf(data[column], ax=plt.gca(), title=f'ACF for {column}')
    plt.subplot(len(data.columns), 2, 2*i)
    plot_pacf(data[column], ax=plt.gca(), title=f'PACF for {column}')
plt.tight_layout()
plt.show()


In [None]:
# 8. Time Series Modeling
# Split the data into training and testing sets
train_size = int(len(data) * 0.8)
train, test = data.iloc[:train_size], data.iloc[train_size:]

# Fit ARIMA model for each stock
models = {}
for column in data.columns:
    model = ARIMA(train[column], order=(5, 1, 0))
    models[column] = model.fit()
    print(f'{column} ARIMA Model Summary')
    print(models[column].summary())


In [None]:
# 9. Forecasting
# Forecasting for each stock
forecasts = {}
for column in data.columns:
    start = len(train)
    end = len(train) + len(test) - 1
    forecasts[column] = models[column].predict(start=start, end=end, typ='levels')
    plt.figure(figsize=(10, 6))
    plt.plot(test[column], label='Actual')
    plt.plot(forecasts[column], label='Forecast')
    plt.title(f'Forecast vs Actual for {column}')
    plt.legend()
    plt.show()

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(test[column], forecasts[column]))
    print(f'{column} RMSE: {rmse}')