In [2]:
import plotly as plotly
import plotly.plotly as py
import plotly.graph_objs as go
import json
import requests
from requests.auth import HTTPBasicAuth

In [3]:
username = 'doddahonnaiah.d' 
api_key = 'CRJtoZiMSTUMAqoQFBi2'

auth = HTTPBasicAuth(username, api_key)
headers = {'Plotly-Client-Platform': 'python'}

plotly.tools.set_credentials_file(username=username, api_key=api_key)

def get_pages(username, page_size):
    url = 'https://api.plot.ly/v2/folders/all?user='+username+'&page_size='+str(page_size)
    response = requests.get(url, auth=auth, headers=headers)
    if response.status_code != 200:
        return
    page = json.loads(response.content)
    yield page
    while True:
        resource = page['children']['next'] 
        if not resource:
            break
        response = requests.get(resource, auth=auth, headers=headers)
        if response.status_code != 200:
            break
        page = json.loads(response.content)
        yield page
        
def permanently_delete_files(username, page_size=500, filetype_to_delete='plot'):
    for page in get_pages(username, page_size):
        for x in range(0, len(page['children']['results'])):
            fid = page['children']['results'][x]['fid']
            res = requests.get('https://api.plot.ly/v2/files/' + fid, auth=auth, headers=headers)
            res.raise_for_status()
            if res.status_code == 200:
                json_res = json.loads(res.content)
                if json_res['filetype'] == filetype_to_delete:
                    requests.post('https://api.plot.ly/v2/files/'+fid+'/trash', auth=auth, headers=headers)
                    requests.delete('https://api.plot.ly/v2/files/'+fid+'/permanent_delete', auth=auth, headers=headers)

permanently_delete_files(username, filetype_to_delete='plot')

In [4]:
import pandas as pd

In [6]:
prices = pd.read_csv('./nyse/prices.csv')

In [7]:
prices.head()

Unnamed: 0,date,symbol,open,close,low,high,volume
0,2016-01-05 00:00:00,WLTW,123.43,125.839996,122.309998,126.25,2163600.0
1,2016-01-06 00:00:00,WLTW,125.239998,119.980003,119.940002,125.540001,2386400.0
2,2016-01-07 00:00:00,WLTW,116.379997,114.949997,114.93,119.739998,2489500.0
3,2016-01-08 00:00:00,WLTW,115.480003,116.620003,113.5,117.440002,2006300.0
4,2016-01-11 00:00:00,WLTW,117.010002,114.970001,114.089996,117.330002,1408600.0


In [8]:
prices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 851264 entries, 0 to 851263
Data columns (total 7 columns):
date      851264 non-null object
symbol    851264 non-null object
open      851264 non-null float64
close     851264 non-null float64
low       851264 non-null float64
high      851264 non-null float64
volume    851264 non-null float64
dtypes: float64(5), object(2)
memory usage: 45.5+ MB


* date - key
* symbol - key
* open - value
* close - value
* low - value
* high - value
* volume - value

`date` and `symbol` are the keys for this data. There are duplicate occurance of both, but only one occurance of a  combination of date and symbol. The others are all value attritubes. 

### TASK: to explore volatility of stock prices

One task that uses the data could be to explore how much the stock prices change as an exploration task. Comparing changes in stock for different companies gives us an idea as to how much fluctuation there is. A boxplot would be appropriate to learn how various stocks compare. Here, the target is the distribution of change. Change is the difference between the opening and closing prices of a stock on a given day. 

* actions: enjoy, summarize
* target: distribution

In [13]:
companies = ['GOOG', 'AAPL', 'MSFT', 'FB', 'ADBE']
sub_prices = prices[prices.symbol.isin(companies)]
sub_prices['change'] = sub_prices.open - sub_prices.close



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [16]:
traces = []
name_map = {
    'GOOG': 'Google',
    'AAPL': 'Apple',
    'MSFT': 'Microsoft',
    'FB': 'Facebook',
    'ADBE': 'Adobe',
}
for cmp in companies:
    cmp_df = sub_prices[sub_prices.symbol == cmp]
    trace = go.Box(
        y = cmp_df['change'],
        name = name_map[cmp]
    )
    traces.append(trace)
    
layout = dict(
    title = "Boxplot of change",
    xaxis = dict(title="Company"),
    yaxis = dict(title="Change (open - close) in $")
)

In [15]:
py.iplot(dict(data=traces, layout=layout))

For this visualization, 
* marks : lines and area
* channels: color (for companies), area (of box) (for IQR), position (vertical) 

From the visualization, we can see that the IQR for Google is much larger than that of any other company, showing that there is more fluctuation in Google's stock.

This visualization can use the pan feature with plotly when there are a lot of companies to show, where panning will help declutter the plot and allow the user to clearly see the distribution for each company. The user can also click on the legend to enable or disable a company from the plot, making comarisions more flexible.

In [17]:
traces = []
for cmp in companies:
    cmp_df = sub_prices[sub_prices.symbol == cmp]
    trace = go.Scatter(
        x = cmp_df['volume'],
        y = cmp_df['change'],
        name = name_map[cmp],
        mode = 'markers'
    )
    traces.append(trace)

layout = dict(
    title = "Volume vs Change",
    xaxis = dict(title="Volume of trade ($)"),
    yaxis = dict(title="Change (open - close) in $")
)

In [18]:
py.iplot(dict(data=traces, layout=layout))

In this visualization we can see the relation between volume and change. This would aid the user to make better choicesin stock. A stock with less change but high volume would be a less volatile stock to trade on. 
* marks - points
* channels - position (both), color

I use position as it is the most effective to communicate the trend. And color to help distinguish between the stocks. 

As seen earlier Google's stock seem to change a lot, but also it is less traded. Apple, however has a high volume, and lesser change each day. We can notice that as volume of stock increases, the change increases for apple. 

Like the previous viz, zoom and pan will help user dig deeper into the viz to see how volume affects change. Using filter, the user can pick only the companies that interest him.