In [1]:
import pandas as pd
import numpy as np
import datetime
import quandl
import config


# settings to observe x amount of rows in dataframe
pd.set_option('max_rows', 500)
pd.set_option('max_columns', 1000)

# API Key
key = quandl.ApiConfig.api_key

In [2]:
tickers_data = pd.read_csv('/Users/Alfredo/Desktop/clustering-stocks/WIKI-datasets-codes.csv', header=-1)
tickers_data.columns = ['Ticker', 'Description']
tickers_data.head()

Unnamed: 0,Ticker,Description
0,WIKI/AAPL,"Apple Inc (AAPL) Prices, Dividends, Splits and..."
1,WIKI/AA,"Alcoa Inc. (AA) Prices, Dividends, Splits and ..."
2,WIKI/ABBV,"AbbVie Inc. (ABBV) Prices, Dividends, Splits a..."
3,WIKI/ABC,"AmerisourceBergen Corp. (ABC) Prices, Dividend..."
4,WIKI/ABT,"Abbott Laboratories (ABT) Prices, Dividends, S..."


In [3]:
# Loop through tickers given
tickers = tickers_data.Ticker[:3].tolist()

symbols = []

for sym in tickers:
    
    if sym in tickers:
        sym = sym.replace(".", "_")
    
    start = "2016-01-01"
    end = "2016-01-15"
    
    mydata = quandl.get(sym, start_date=start, end_date=end, api_key = key)
    mydata['Symbol'] = sym
    symbols.append(mydata)
    
    
# attach tickers tables
mydata = pd.concat(symbols)

# index 'Date' to column
mydata.reset_index(inplace=True)

# Change location of column 'Symbol'
symb = mydata['Symbol']
mydata.drop(labels=['Symbol'], axis=1,inplace = True)
mydata.insert(1, 'Symbol', symb)
mydata.head(3)

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
0,2016-01-04,WIKI/AAPL,102.61,105.368,102.0,105.35,67649387.0,0.0,1.0,99.136516,101.801154,98.547165,101.783763,67649387.0
1,2016-01-05,WIKI/AAPL,105.75,105.85,102.41,102.71,55790992.0,0.0,1.0,102.170223,102.266838,98.943286,99.233131,55790992.0
2,2016-01-06,WIKI/AAPL,100.56,102.37,99.87,100.7,68457388.0,0.0,1.0,97.155911,98.90464,96.489269,97.291172,68457388.0


In [4]:
# Slices WIKI out of symbol column for tickers
mydata['Symbol'] = mydata['Symbol'].map(lambda x: str(x)[5:])
#mydata.head(3)

# resetting indexes
mydata = mydata.set_index(['Date','Symbol']).unstack(0).stack(dropna=False).groupby(level='Symbol').ffill().bfill()
mydata.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AAPL,2016-01-04,102.61,105.368,102.0,105.35,67649387.0,0.0,1.0,99.136516,101.801154,98.547165,101.783763,67649387.0
AAPL,2016-01-05,105.75,105.85,102.41,102.71,55790992.0,0.0,1.0,102.170223,102.266838,98.943286,99.233131,55790992.0
AAPL,2016-01-06,100.56,102.37,99.87,100.7,68457388.0,0.0,1.0,97.155911,98.90464,96.489269,97.291172,68457388.0
AAPL,2016-01-07,98.68,100.13,96.43,96.45,81094428.0,0.0,1.0,95.339552,96.740467,93.165717,93.18504,81094428.0
AAPL,2016-01-08,98.55,99.11,96.76,96.96,70798016.0,0.0,1.0,95.213952,95.754996,93.484546,93.677776,70798016.0


In [5]:
# Builds daily return column
mydata['Daily_Return'] = mydata['Adj. Close'].pct_change(1)
#mydata.head()

# Builds overall percentage on return or Cumulitive return(return difference from start to current price)
mydata['Cumulitive_Return'] = mydata.groupby('Symbol')['Adj. Close'].transform(lambda x: x[-1]/x[0] -1)
#mydata.head(15)

# Calculates daily average high, low, volume trader for each stock
mydata['Avg_High'] = mydata.groupby('Symbol').High.transform('mean')
mydata['Avg_Low'] = mydata.groupby('Symbol').Low.transform('mean')
mydata['Avg_Vol_Trader'] = mydata.groupby('Symbol').Volume.transform('mean')
mydata['Avg_Dividend'] = mydata.groupby('Symbol')['Ex-Dividend'].transform('mean')


# Mean Daily Return
d_mean_return = mydata.groupby('Symbol').Daily_Return.transform('mean').to_frame()
mydata['Avg_Daily_Return'] = d_mean_return

# Std Daily
d_std_return = mydata.groupby('Symbol').Daily_Return.transform('std').to_frame()
mydata['Std_Daily_Return'] = d_std_return

# Calculates Daily Sharpe Ratio(Calculates risk adjusted return)
# mean return - risk free rate / standard deviation
# For this formula we are assuming a risk free rate of 0(different people,different opinions)
sr = d_mean_return/d_std_return
mydata['Daily_Sharpe_Ratio'] = sr


mydata.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume,Daily_Return,Cumulitive_Return,Avg_High,Avg_Low,Avg_Vol_Trader,Avg_Dividend,Avg_Daily_Return,Std_Daily_Return,Daily_Sharpe_Ratio
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAPL,2016-01-04,102.61,105.368,102.0,105.35,67649387.0,0.0,1.0,99.136516,101.801154,98.547165,101.783763,67649387.0,,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-05,105.75,105.85,102.41,102.71,55790992.0,0.0,1.0,102.170223,102.266838,98.943286,99.233131,55790992.0,-0.025059,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-06,100.56,102.37,99.87,100.7,68457388.0,0.0,1.0,97.155911,98.90464,96.489269,97.291172,68457388.0,-0.01957,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-07,98.68,100.13,96.43,96.45,81094428.0,0.0,1.0,95.339552,96.740467,93.165717,93.18504,81094428.0,-0.042205,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-08,98.55,99.11,96.76,96.96,70798016.0,0.0,1.0,95.213952,95.754996,93.484546,93.677776,70798016.0,0.005288,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-11,98.97,99.06,97.34,98.53,49739377.0,0.0,1.0,95.619735,95.706688,94.044912,95.194629,49739377.0,0.016192,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-12,100.55,100.69,98.8399,99.96,49154227.0,0.0,1.0,97.14625,97.281511,95.494039,96.576222,49154227.0,0.014513,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-13,100.32,101.19,97.3,97.39,62439631.0,0.0,1.0,96.924036,97.764585,94.006267,94.09322,62439631.0,-0.02571,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-14,97.96,100.48,95.74,99.52,63170127.0,0.0,1.0,94.643925,97.078619,92.499075,96.151117,63170127.0,0.021871,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
AAPL,2016-01-15,96.2,97.71,95.36,97.13,79833891.0,0.0,1.0,92.943503,94.402387,92.131938,93.842021,79833891.0,-0.024015,-0.078026,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236


In [6]:
mydata1 = mydata.drop(mydata.columns[:14], axis=1)
mydata2 = mydata1.reset_index(level=1, drop=True)
mydata3 = mydata2.groupby('Symbol').first()
mydata3

Unnamed: 0_level_0,Avg_High,Avg_Low,Avg_Vol_Trader,Avg_Dividend,Avg_Daily_Return,Std_Daily_Return,Daily_Sharpe_Ratio
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AAPL,101.1958,98.20499,64812746.4,0.0,-0.008744,0.02324,-0.376236
ABBV,56.95896,54.2315,11486529.4,0.057,-0.041795,0.141421,-0.295538
