# NASDAQ Data Exploration 

In [1]:
import pandas as pd
from pandas import Series
from pandas.plotting import autocorrelation_plot
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA


from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [2]:
import warnings
import itertools
import numpy as np
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')

import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 18
matplotlib.rcParams['xtick.labelsize'] = 16
matplotlib.rcParams['ytick.labelsize'] = 16
matplotlib.rcParams['text.color'] = 'k'

Load NASDAQ file into DataFrame

In [3]:
nasdaq = "NASDAQ.csv"
df_nasdaq = pd.read_csv(nasdaq)


In [4]:
#drop uneccesary columns

df_nasdaq.drop('High', axis = 1, inplace = True)
df_nasdaq.drop('Low', axis = 1, inplace = True)
df_nasdaq.drop('Volume', axis = 1, inplace = True)

In [5]:
#check  dataframe
print(df_nasdaq)

             Date         Open        Close    Adj Close
0      1971-02-05   100.000000   100.000000   100.000000
1      1971-02-08   100.839996   100.839996   100.839996
2      1971-02-09   100.760002   100.760002   100.760002
3      1971-02-10   100.690002   100.690002   100.690002
4      1971-02-11   101.449997   101.449997   101.449997
5      1971-02-12   102.050003   102.050003   102.050003
6      1971-02-16   102.190002   102.190002   102.190002
7      1971-02-17   101.739998   101.739998   101.739998
8      1971-02-18   101.419998   101.419998   101.419998
9      1971-02-19   100.699997   100.699997   100.699997
10     1971-02-22    99.680000    99.680000    99.680000
11     1971-02-23    99.720001    99.720001    99.720001
12     1971-02-24   100.639999   100.639999   100.639999
13     1971-02-25   101.230003   101.230003   101.230003
14     1971-02-26   101.339996   101.339996   101.339996
15     1971-03-01   101.779999   101.779999   101.779999
16     1971-03-02   101.839996 

Drop empty Columns

In [6]:
null_open = any(df_nasdaq['Open'].isnull())
if null_open:
    print("Removing null entries from Open Stocks")
    df_nasdaq = df_nasdaq.dropna()  
    
assert not null_open

In [7]:
null_close= any(df_nasdaq['Close'].isnull())
if null_close:
    print("Removing null entries from Close Stocks")
    df_nasdaq = df_nasdaq.dropna()  
    
assert not null_close

In [8]:
null_adjclose= any(df_nasdaq['Adj Close'].isnull())
if null_adjclose:
    print("Removing null entries from Adj Close Stocks")
    df_nasdaq = df_nasdaq.dropna()  
    
assert not null_adjclose

In [None]:
df_nasdaq.describe()

Unnamed: 0,Open,Close,Adj Close
count,12168.0,12168.0,12168.0
mean,1633.183298,1632.965611,1632.965611
std,1807.721775,1807.523459,1807.523459
min,54.869999,54.869999,54.869999
25%,237.900002,237.900002,237.900002
50%,798.429993,799.279999,799.279999
75%,2388.755005,2380.71991,2380.71991
max,8150.850098,8164.0,8164.0


In [None]:
#setting index as date
df_nasdaq.set_index('Date', inplace = True)
print(df_nasdaq.index)


Index(['1971-02-05', '1971-02-08', '1971-02-09', '1971-02-10', '1971-02-11',
       '1971-02-12', '1971-02-16', '1971-02-17', '1971-02-18', '1971-02-19',
       ...
       '2019-04-22', '2019-04-23', '2019-04-24', '2019-04-25', '2019-04-26',
       '2019-04-29', '2019-04-30', '2019-05-01', '2019-05-02', '2019-05-03'],
      dtype='object', name='Date', length=12168)


In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_nasdaq.index, df_nasdaq['Open'])
#df_nasdaq['Open'].plot(figsize=(15, 6))
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_nasdaq.index, df_nasdaq['Close'])

#df_nasdaq['Open'].plot(figsize=(15, 6))
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_nasdaq.index, df_nasdaq['Adj Close'])

#df_nasdaq['Open'].plot(figsize=(15, 6))
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

In [None]:
autocorrelation_plot(df_nasdaq['Open'])


In [None]:
autocorrelation_plot(df_nasdaq['Close'])

In [None]:
autocorrelation_plot(df_nasdaq['Adj Close'])

In [None]:
#code idea from https://www.machinelearningplus.com/time-series/arima-model-time-series-forecasting-python/
from statsmodels.tsa.stattools import adfuller
from numpy import log
result = adfuller(df_nasdaq['Open'].dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
#code idea from https://www.machinelearningplus.com/time-series/arima-model-time-series-forecasting-python/
import numpy as np, pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(15,7), 'figure.dpi':120})

fig, axes = plt.subplots(3, 2, sharex=True)
axes[0, 0].plot(df_nasdaq['Open']); axes[0, 0].set_title('Original Series')
plot_acf(df_nasdaq['Open'], ax=axes[0, 1])

# 1st Differencing
axes[1, 0].plot(df_nasdaq['Open'].diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df_nasdaq['Open'].diff().dropna(), ax=axes[1, 1])

plt.show()

In [None]:
series = Series.df_nasdaq['Open']
series.plot()
pyplot.show()

In [None]:
#model = ARIMA(df_nasdaq['Open'], order=(,,), dates=df_nasdaq.index)
#model_ARIMA = model.fit()
#print(model_ARIMA.summary())

In [None]:
#residuals = DataFrame(model_ARIMA.resid)
#residuals.plot()
#pyplot.show()
#residuals.plot(kind='kde')
#pyplot.show()
#print(residuals.describe())