# Exploring the Sharadar sample data set

In [None]:
import pandas as pd
import nasdaqdatalink as nasdaq

## Advertised sample tickers

In [None]:
sample_tickers = pd.DataFrame([
    ("AAPL","APPLE INC"),
    ("AXP","AMER EXPRESS CO"),
    ("BA","BOEING CO"),
    ("CAT","CATERPILLAR INC"),
    ("CSCO","CISCO SYSTEMS"),
    ("CVX","CHEVRON CORP"),
    ("DD","DU PONT (EI) DE"),
    ("DIS","DISNEY WALT"),
    ("GE","GENL ELECTRIC"),
    ("GS","GOLDMAN SACHS"),
    ("HD","HOME DEPOT"),
    ("IBM","INTL BUS MACH"),
    ("INTC","INTEL CORP"),
    ("JNJ","JOHNSON & JOHNS"),
    ("JPM","JPMORGAN CHASE"),
    ("KO","COCA COLA CO"),
    ("MCD","MCDONALDS CORP"),
    ("MMM","3M CO"),
    ("MRK","MERCK & CO INC"),
    ("MSFT","MICROSOFT CORP"),
    ("NKE","NIKE INC-B"),
    ("PFE","PFIZER INC"),
    ("PG","PROCTER & GAMBL"),
    ("TRV","TRAVELERS COS"),
    ("UNH","UNITEDHEALTH GP"),
    ("UTX","UTD TECHS CORP"),
    ("V","VISA INC-A"),
    ("VZ","VERIZON COMM"),
    ("WMT","WALMART INC"),
    ("XOM","EXXON MOBIL CRP"),
], columns=['Ticker', 'Company Name'])
sample_tickers

In [None]:
price_data = nasdaq.get_table("SHARADAR/SEP")
price_data

In [None]:
tickers_fetched = price_data['ticker'].unique()
tickers_fetched

In [None]:
number_tickers_fetched = len(tickers_fetched)
number_tickers_fetched # 30, matches the sample they indicate they offer

In [None]:
min_date = price_data['date'].min()
max_date = price_data['date'].max()

(min_date, max_date)

## Checking quality

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Sort the data by ticker and date just to ensure proper order, then calc returns
price_data = price_data.sort_values(by=['ticker', 'date'])

price_data['asset_return'] = price_data.groupby('ticker')['close'].pct_change()

price_data = price_data.dropna() # Lazy

# Histogram and KDE (Kernel Density Estimate) for each ticker's daily returns
plt.figure(figsize=(14, 7))
sns.histplot(price_data['asset_return'], kde=True, bins=50, color='skyblue', stat="density", linewidth=0)
plt.title('Distribution of Daily Returns for All Tickers')
plt.xlabel('Daily Return')
plt.ylabel('Density')
plt.show()

In [None]:
# Volatility (Standard Deviation of Returns)
volatility = price_data.groupby('ticker')['asset_return'].std()

plt.figure(figsize=(14, 7))
volatility.sort_values().plot(kind='bar', color='skyblue')
plt.title('Volatility (Standard Deviation) of Daily Returns for Each Ticker')
plt.xlabel('Ticker')
plt.ylabel('Volatility (Std. Dev.)')
plt.show()

In [None]:
# Cumulative Returns
price_data['cumulative_return'] = (1 + price_data['asset_return']).groupby(price_data['ticker']).cumprod() - 1

plt.figure(figsize=(14, 7))
for ticker in price_data['ticker'].unique():
    ticker_data = price_data[price_data['ticker'] == ticker]
    plt.plot(ticker_data['date'], ticker_data['cumulative_return'], label=ticker)

plt.title('Cumulative Returns Over Time')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=2, fontsize=8)
plt.show()