# NASDAQ Data Exploration 

# Import Python Packages

In [None]:
import pandas as pd
from pandas.plotting import autocorrelation_plot
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA


from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
import warnings
import itertools
import numpy as np
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')

import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 18
matplotlib.rcParams['xtick.labelsize'] = 16
matplotlib.rcParams['ytick.labelsize'] = 16
matplotlib.rcParams['text.color'] = 'k'

# Import and Clean Data

In [None]:
nasdaq = "NASDAQ.csv"
df_nasdaq = pd.read_csv(nasdaq)


In [None]:
#drop uneccesary columns

df_nasdaq.drop('High', axis = 1, inplace = True)
df_nasdaq.drop('Low', axis = 1, inplace = True)
df_nasdaq.drop('Volume', axis = 1, inplace = True)

In [None]:
#check  dataframe
print(df_nasdaq)

# Drop Empty Columns

In [None]:
#drop empty columns in open
null_open = any(df_nasdaq['Open'].isnull())
if null_open:
    print("Removing null entries from Open Stocks")
    df_nasdaq = df_nasdaq.dropna()  
    
assert not null_open

In [None]:
#drop empty columns in close
null_close= any(df_nasdaq['Close'].isnull())
if null_close:
    print("Removing null entries from Close Stocks")
    df_nasdaq = df_nasdaq.dropna()  
    
assert not null_close

In [None]:
#drop empty columns in adj close
null_adjclose= any(df_nasdaq['Adj Close'].isnull())
if null_adjclose:
    print("Removing null entries from Adj Close Stocks")
    df_nasdaq = df_nasdaq.dropna()  
    
assert not null_adjclose

In [None]:
df_nasdaq.describe()

# Setting Indexes and Labels

In [None]:
#df_nasdaq.set_index('Date')
#df_nasdaq = df_nasdaq.reset_index()
#df_nasdaq = df_nasdaq.drop(['index'], axis = 1)

#df_nasdaq['Date'] = pd.to_datetime(df_nasdaq['Date'])
df_nasdaq.set_index('Date', inplace = True)
print(df_nasdaq.index)


In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_nasdaq.index, df_nasdaq['Open'])
#df_nasdaq['Open'].plot(figsize=(15, 6))
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_nasdaq.index, df_nasdaq['Close'])

#df_nasdaq['Open'].plot(figsize=(15, 6))
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_nasdaq['D, df_nasdaq['Adj Close'])

#df_nasdaq['Open'].plot(figsize=(15, 6))
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

# Autocorrelation Plot

In [None]:
#autocorrelation plot for Open
autocorrelation_plot(df_nasdaq['Open'])


In [None]:
#autocorrelation plot for Close
autocorrelation_plot(df_nasdaq['Close'])

In [None]:
#autocorrelation plot for Adj Close
autocorrelation_plot(df_nasdaq['Adj Close'])

In [None]:
#code idea from https://www.machinelearningplus.com/time-series/arima-model-time-series-forecasting-python/
from statsmodels.tsa.stattools import adfuller
from numpy import log
result = adfuller(df_nasdaq['Open'].dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

# Differencing Plots

In [None]:
import numpy as np, pandas as pd
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(15,7), 'figure.dpi':120})

fig, axes = plt.subplots(3, 2, sharex=True)
axes[0, 0].plot(df_nasdaq['Open']); axes[0, 0].set_title('Original Series')
plot_acf(df_nasdaq['Open'], ax=axes[0, 1])

# 1st Differencing
axes[1, 0].plot(df_nasdaq['Open'].diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df_nasdaq['Open'].diff().dropna(), ax=axes[1, 1])

plt.show()

# ARIMA Models

In [None]:
model = ARIMA(df_nasdaq['Open'], order=np.array([1, 1, 1]), freq='W')
model_fit = model.fit()
print(model_fit.summary())

In [None]:
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())

In [None]:
model2 = ARIMA(df_nasdaq['Close'], order=np.array([1, 1, 1]), freq='W')
model2_fit = model2.fit()
print(model2_fit.summary())

In [None]:
residuals2 = pd.DataFrame(model2_fit.resid)
residuals2.plot()
pyplot.show()
residuals2.plot(kind='kde')
pyplot.show()
print(residuals2.describe())

In [None]:
model3 = ARIMA(df_nasdaq['Adj Close'], order=np.array([1, 1, 1]), freq='W')
model3_fit = model3.fit()
print(model3_fit.summary())

In [None]:
residuals3 = pd.DataFrame(model3_fit.resid)
residuals3.plot()
pyplot.show()
residuals3.plot(kind='kde')
pyplot.show()
print(residuals3.describe())