# Data Wrangling with Quandl

### Import the relevant Modules

Detailed Quandl API instructions here: https://docs.quandl.com/docs/time-series

In [1]:
import requests
import json

In [2]:
# Store the API - Get key by registering to http://www.quandl.com 
# Select the desired stock ticker, start date, and end date - save each parameter as a string
API_KEY = ''
STOCK_TICKER = 'AFX_X'
START_DATE = '2017-01-01'
END_DATE = '2017-12-31'

### Getting a glimpse at the data

In [3]:
# JSON structure will be returned
params = {'api-key': API_KEY}
req = requests.get(
    f'https://www.quandl.com/api/v3/datasets/FSE/{STOCK_TICKER}.json?&start_date=2021-09-01&end_date=2021-09-01', 
    params=params
)
req_json = req.json()
req_json

{'dataset': {'id': 10095370,
  'dataset_code': 'AFX_X',
  'database_code': 'FSE',
  'name': 'Carl Zeiss Meditec (AFX_X)',
  'description': 'Stock Prices for Carl Zeiss Meditec (2020-11-02) from the Frankfurt Stock Exchange.<br><br>Trading System: Xetra<br><br>ISIN: DE0005313704',
  'refreshed_at': '2020-12-01T14:48:09.907Z',
  'newest_available_date': '2020-12-01',
  'oldest_available_date': '2000-06-07',
  'column_names': ['Date',
   'Open',
   'High',
   'Low',
   'Close',
   'Change',
   'Traded Volume',
   'Turnover',
   'Last Price of the Day',
   'Daily Traded Units',
   'Daily Turnover'],
  'frequency': 'daily',
  'type': 'Time Series',
  'premium': False,
  'limit': None,
  'transform': None,
  'column_index': None,
  'start_date': '2021-09-01',
  'end_date': '2020-12-01',
  'data': [],
  'collapse': None,
  'order': None,
  'database_id': 6129}}

In [4]:
# Check the staus code (Status code of 200 means data is imported properly)
code = req.status_code
if code == requests.codes.ok:
    print(f'{STOCK_TICKER} - Data properly imported')
else:
    print(f'Requests Error {code}: {requests.status_codes._codes[code][0]}')

AFX_X - Data properly imported



### 1. Collect data from the Franfurt Stock Exchange, for the ticker AFX_X, for the whole year 2017 


In [5]:
# collect the stock exchange data using the start date, end date, and stock ticker
stock_params = {'api-key': API_KEY, 'start_date': START_DATE, 'end_date': END_DATE}
stock_req = requests.get(
    f'https://www.quandl.com/api/v3/datasets/FSE/{STOCK_TICKER}.json?', 
    params=stock_params
)

In [6]:
# Check Status Code 
stock_code = stock_req.status_code
if stock_code == requests.codes.ok:
    print(f'{STOCK_TICKER} - Data properly imported')
else:
    print(f'Requests Error {stock_code}: {requests.status_codes._codes[code][0]}')

AFX_X - Data properly imported


### 2. Convert the returned JSON object into a Python dictionary.

In [7]:
# used the standard python package 'json' to turn json object into a dict
stock_json = stock_req.json()
type(stock_json)

dict

### 3. Calculate what the highest and lowest opening prices were for the stock in this period.

In [8]:
stock_json['dataset']['column_names']

['Date',
 'Open',
 'High',
 'Low',
 'Close',
 'Change',
 'Traded Volume',
 'Turnover',
 'Last Price of the Day',
 'Daily Traded Units',
 'Daily Turnover']

In [9]:
# Get the index 'Open' in dataset
open_idx = stock_json['dataset']['column_names'].index('Open')

# Create a list of all opening pricese over the time period
data_points = stock_json['dataset']['data']

opening = [x[open_idx] for x in data_points if x[open_idx] is not None]
#display the first 20 elements of opening
opening[0:20]

[51.76,
 51.65,
 51.45,
 51.05,
 51.16,
 51.88,
 52.73,
 52.37,
 52.7,
 53.11,
 52.64,
 52.29,
 52.28,
 51.5,
 50.89,
 50.8,
 51.21,
 49.5,
 49.52,
 48.64]

In [10]:
# Get high opening price
high_opening = max(opening)
print(f'Highest Opening Price: {high_opening}')

Highest Opening Price: 53.11


In [11]:
# Get low opening price
low_opening = min(opening)
print(f'Lowest Opening Price: {low_opening}')

Lowest Opening Price: 34.0


### 4. What was the largest change in any one day (based on High and Low price)?

In [12]:
# Get the index of the 'high' and 'low' prices
high_idx = stock_json['dataset']['column_names'].index('High')
low_idx = stock_json['dataset']['column_names'].index('Low')

high_low = [(x[high_idx],x[low_idx]) for x in data_points]
#display the first 20 elements of high_low
high_low[0:20]

[(51.94, 51.45),
 (51.82, 51.43),
 (51.89, 50.76),
 (51.5, 50.92),
 (51.52, 50.9),
 (52.04, 51.2),
 (52.73, 51.07),
 (52.75, 51.61),
 (52.7, 51.64),
 (53.54, 52.15),
 (53.35, 52.48),
 (53.1, 51.82),
 (52.45, 51.26),
 (52.83, 51.28),
 (51.47, 50.81),
 (51.11, 50.39),
 (51.38, 50.4),
 (51.23, 49.5),
 (50.49, 49.17),
 (49.84, 48.28)]

In [13]:
# Get a list of all daily swings - use absolute value
daily_swing = ['%.2f' % abs(high_low[i][0] - high_low[i][1]) for i in range(len(high_low)) \
               if high_low[i][0] is not None and high_low[i][1] is not None]
# display the first 20 elements of daily_swing
daily_swing[0:20]

['0.49',
 '0.39',
 '1.13',
 '0.58',
 '0.62',
 '0.84',
 '1.66',
 '1.14',
 '1.06',
 '1.39',
 '0.87',
 '1.28',
 '1.19',
 '1.55',
 '0.66',
 '0.72',
 '0.98',
 '1.73',
 '1.32',
 '1.56']

In [14]:
# get the largest daily swing
max_swing = max(daily_swing)
print(f'Largest change in one day: {max_swing}')

Largest change in one day: 2.81


### 5. What was the largest change between any two days (based on Closing Price)?

In [15]:
# Create a list of all close prices
close_idx = stock_json['dataset']['column_names'].index('Close')

closing = [x[close_idx] for x in data_points if x[close_idx] is not None]
# display the first 20 elements of closing
closing[0:20]

[51.76,
 51.6,
 51.82,
 51.32,
 51.4,
 51.27,
 51.66,
 52.62,
 52.01,
 52.67,
 53.09,
 52.43,
 52.14,
 52.12,
 51.47,
 50.89,
 51.25,
 51.14,
 49.86,
 49.7]

In [16]:
# Create a list composed of price changes between closing
daily_close_change = [ '%.2f' % abs(closing[i] - closing[i-1]) for i in range(1, len(closing))]
# display the first 20 elements of daily_close_change
daily_close_change[0:20]

['0.16',
 '0.22',
 '0.50',
 '0.08',
 '0.13',
 '0.39',
 '0.96',
 '0.61',
 '0.66',
 '0.42',
 '0.66',
 '0.29',
 '0.02',
 '0.65',
 '0.58',
 '0.36',
 '0.11',
 '1.28',
 '0.16',
 '0.95']

In [17]:
# get the max from the list to view the largest change
largest_change = max(daily_close_change)
print(f'Largest change between two days: {largest_change}')

Largest change between two days: 2.56


### 6. What was the average daily trading volume during this year?

In [18]:
# Get the 'Trading Volume' index
traded_volume_idx = stock_json['dataset']['column_names'].index('Traded Volume')

trading_volume = [x[traded_volume_idx] for x in data_points if x[traded_volume_idx] is not None]
#display the first 20 elements of trading_volume

In [19]:
# find the average (mean) daily trading volume
# function for finding the mean of a list of numbers
def lst_mean(lst: list) -> float:
    lst = [float(x) for x in lst] # convert all items in the list to a float
    return sum(lst)/len(lst)

avg_trading = lst_mean(trading_volume)
print(f'Average daily trading volume: ' + '%.2f' % avg_trading)

Average daily trading volume: 89124.34


### 7. What was the median trading volume during this year. (Note: you may need to implement your own function for calculating the median.)

In [20]:
# Function that returns the medium of a list of numbers
def lst_median(lst: list) -> float:
    lst = sorted([float(x) for x in lst]) # convert all items in the list to a float and sort the list
    lst_len = len(lst) # set lst_len to len(lst) so the function doe not have to be called multiple times
    if lst_len%2 == 0: # check if the function is even
        return (lst[int(lst_len/2)-1] + lst[-int(lst_len/2)]) / 2
    else:
        return lst[int(lst_len/2)]
    
median_trading = lst_median(trading_volume)
print(f'Median daily trading volume: ' '%.2f' % median_trading)

Median daily trading volume: 76286.00
