## Market Analysis

### Load Libraries

In [1]:
import pandas as pd
import numpy as np
import os
from os import listdir
from datetime import timedelta
import matplotlib.pyplot as plt
from utils.util import get_data, plot_data, compute_daily_returns, fit_scatter, compute_sharpe_ratio
%matplotlib inline 

### Initializations

In [None]:
date_start = '2016-07-01'
date_end = '2017-07-08'
folder_name = 'download'

dates = pd.date_range(date_start, date_end)
# NYSE – Most Active Stocks
# symbols = ['BAC', 'RAD', 'GE', 'CHK', 'WFT', 'ABEV', 'WLL', 'VALE', 'AKS', 
#             'X', 'ESV', 'F', 'T', 'CX', 'VRX', 'PBR', 'ITUB', 'SWN', 'PFE', 'MRO']

### Load Stock Data

In [None]:
# create list of tickers from filenames 
files_list = listdir(os.path.join(folder_name))
symbols = map(lambda x: x.split('.csv')[0], files_list)

if 'SPY' in symbols: # eliminate SPY from symbols so that it becomes first element processed by get_data
    del symbols[symbols.index('SPY')]

df = get_data(symbols, dates, price='Close', folder=folder_name)

print "Total Stocks:", df.shape[1]

### Clean DataFrame

In [None]:
# delete all columns that are NaN for the last N days or for more than X% of the time
days = 20
date_temp = df.index[-1] - timedelta(days=20)
df_sub = df.ix[date_temp:date_end]

for name, values in df_sub.iteritems():
    if (df_sub[name].isnull().all()) | (df[name].isnull().sum()/float(df.shape[0]) > 0.35):
        del df[name]
        print name,

print "\nTotal Stocks:", df.shape[1]

### Compute statistics

In [None]:
# normalize all stock prices
df_n = df / df.ix[0]

# Compute cumulative ruturn (from beginning to end)
cum_ret = df_n.ix[-1] - df_n.ix[0]

# Compute daily return, mean and standard deviation (volatility)
daily_returns = compute_daily_returns(df)
avg_daily_ret = daily_returns.mean()
std_daily_ret = daily_returns.std()

# Compute sharp ratio
sharpe_ratio = compute_sharpe_ratio(df)

# Group statistics
stat = pd.concat([cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio], 
          keys=['cum_ret', 'avg_daily_ret', 'std_daily_ret', 'sharpe_ratio'],axis=1)

In [None]:
plt.figure(figsize=(12,8))
plt.plot(avg_daily_ret, std_daily_ret, 'o')

for i, txt in enumerate(df.columns.tolist()):
    plt.annotate(txt, (avg_daily_ret[i], std_daily_ret[i]))

plt.title('AVG daily returns vs STD daily returns')
plt.xlabel('avg_daily_ret')
plt.ylabel('std_daily_ret')
plt.xlim((-0.002, 0.003))
plt.ylim((0.005, 0.016))
plt.grid()
plt.show()

In [None]:
# Show statistics sorted by sharp ratio
stat = stat.sort_values(['sharpe_ratio'], ascending=False)
stat.ix['BA']

### Perform visual check

In [None]:
stock = ['SPY', 'BA', 'BAC', 'CMA', 'GD', 'GLW','STT']

# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(df_n[stock])

* **Daily Returns**

In [None]:
plot_data(daily_returns[['SPY', 'BA']], title="Daily Returns")

* **Scatter Plot**

In [None]:
fit_scatter(daily_returns, x='SPY', y='BA')

In [None]:
correl = daily_returns.corr(method='pearson')
correl['SPY'].sort_values(ascending=False)