In [15]:
import pandas as pd
import numpy as np
import datetime
import quandl
import config


# settings to observe x amount of rows in dataframe
pd.set_option('max_rows', 500)
pd.set_option('max_columns', 1000)

# API Key
key = quandl.ApiConfig.api_key

In [16]:
tickers_data = pd.read_csv('/Users/Alfredo/Desktop/clustering-stocks/WIKI-datasets-codes.csv', header=-1)
tickers_data.columns = ['Ticker', 'Description']
tickers_data.head()

Unnamed: 0,Ticker,Description
0,WIKI/AAPL,"Apple Inc (AAPL) Prices, Dividends, Splits and..."
1,WIKI/AA,"Alcoa Inc. (AA) Prices, Dividends, Splits and ..."
2,WIKI/ABBV,"AbbVie Inc. (ABBV) Prices, Dividends, Splits a..."
3,WIKI/ABC,"AmerisourceBergen Corp. (ABC) Prices, Dividend..."
4,WIKI/ABT,"Abbott Laboratories (ABT) Prices, Dividends, S..."


In [318]:
# Loop through tickers given
tickers = tickers_data[:3].Ticker.tolist()

symbols = []

for sym in tickers:
    
    if sym in tickers:
        sym = sym.replace(".", "_")
    
    start = ""
    end = ""
    
    mydata = quandl.get(sym, start_date=start, end_date=end, api_key = key)
    mydata['Symbol'] = sym
    symbols.append(mydata)
    
    
# attach tickers tables
mydata = pd.concat(symbols)

# index 'Date' to column
mydata.reset_index(inplace=True)

# Change location of column 'Symbol'
symb = mydata['Symbol']
mydata.drop(labels=['Symbol'], axis=1,inplace = True)
mydata.insert(1, 'Symbol', symb)
mydata.head(1)

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
0,1980-12-12,WIKI/AAPL,28.75,28.87,28.75,28.75,2093900.0,0.0,1.0,0.422706,0.42447,0.422706,0.422706,117258400.0


In [366]:
# SAVES FILE WITH A TABLE INCLUDING THE YEARS COMPANIES HAVE BEEN PUBLICLY TRADED

# Finds the number of years each company has been publicly traded
df_years = mydata.set_index('Symbol')

min_data = df_years.groupby('Symbol')['Date'].min() 
max_data = df_years.groupby('Symbol')['Date'].max()
new_df = (max_data - min_data).to_frame() / 365.25
#new_df.head()
new_df['Date'] = new_df['Date'].map(lambda x: str(x)[:-21])
new_df = new_df.rename(index=str, columns={"Date":"Yrs_Publ_Trded"}).reset_index()
#new_df

mydata_yrs = mydata.merge(new_df)
mydata_yrs = mydata_yrs.groupby(['Symbol','Yrs_Publ_Trded']).size().to_frame().reset_index()
mydata_yrs = mydata_yrs.drop(mydata_yrs.columns[2],axis=1)
#mydata_yrs.head()

mydata_yrs.to_csv("years_publ_traded.csv")
ndf = pd.read_csv('/Users/Alfredo/Desktop/clustering-stocks/years_publ_traded.csv')
ndf = ndf.drop(ndf.columns[0], axis=1)
ndf.head(1)

Unnamed: 0,Symbol,Yrs_Publ_Trded
0,WIKI/AA,1


In [368]:
mydata = mydata.merge(ndf)
mydata.head(1)

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume,Yrs_Publ_Trded
0,1980-12-12,WIKI/AAPL,28.75,28.87,28.75,28.75,2093900.0,0.0,1.0,0.422706,0.42447,0.422706,0.422706,117258400.0,37


In [313]:
# Slices WIKI out of symbol column for tickers
mydata['Symbol'] = mydata['Symbol'].map(lambda x: str(x)[5:])
#mydata.head(3)

# resetting indexes
mydata = mydata.set_index(['Date','Symbol']).unstack(0).stack(dropna=False).groupby(level='Symbol').ffill().bfill()
mydata.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume,Yrs_Publ_Trded
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AA,1980-12-12,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,1
AA,1980-12-15,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,1
AA,1980-12-16,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,1
AA,1980-12-17,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,1
AA,1980-12-18,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,1


In [314]:
# Builds daily return column
mydata['Daily_Return'] = mydata['Adj. Close'].pct_change(1)
#mydata.head(1)

# Builds overall percentage on return or Cumulitive return(return difference from start to current price)
mydata['Cumulitive_Return'] = mydata.groupby('Symbol')['Adj. Close'].transform(lambda x: x[-1]/x[0] -1)
#mydata.head(2)

# Calculates daily average high, low, volume trader for each stock
mydata['Avg_High'] = mydata.groupby('Symbol').High.transform('mean')
mydata['Avg_Low'] = mydata.groupby('Symbol').Low.transform('mean')
mydata['Avg_Vol_Trader'] = mydata.groupby('Symbol').Volume.transform('mean')
mydata['Avg_Dividend'] = mydata.groupby('Symbol')['Ex-Dividend'].transform('mean')
#mydata.head(2)

# Mean Daily Return
d_mean_return = mydata.groupby('Symbol').Daily_Return.transform('mean').to_frame()
mydata['Avg_Daily_Return'] = d_mean_return
#mydata.head(2)

# Std Daily
d_std_return = mydata.groupby('Symbol').Daily_Return.transform('std').to_frame()
mydata['Std_Daily_Return'] = d_std_return
#mydata.head(2)

# Calculates Daily Sharpe Ratio(Calculates risk adjusted return)
# mean return - risk free rate / standard deviation
# For this formula we are assuming a risk free rate of 0(different people,different opinions)
sr = d_mean_return/d_std_return
mydata['Daily_Sharpe_Ratio'] = sr

mydata.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume,Yrs_Publ_Trded,Daily_Return,Cumulitive_Return,Avg_High,Avg_Low,Avg_Vol_Trader,Avg_Dividend,Avg_Daily_Return,Std_Daily_Return,Daily_Sharpe_Ratio
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
AA,1980-12-12,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,1,,0.937826,23.964408,22.212787,31367230.0,0.0,8e-05,0.004264,0.018733


In [315]:
# Builds list to easy access to reorder
cols = list(mydata.columns.values)
mydata = mydata[['Open','High','Low','Close','Volume','Ex-Dividend','Split Ratio','Adj. Open','Adj. High','Adj. Low',
                 'Adj. Close','Adj. Volume','Daily_Return','Cumulitive_Return','Yrs_Publ_Trded','Avg_High','Avg_Low',
                 'Avg_Vol_Trader','Avg_Dividend','Avg_Daily_Return','Std_Daily_Return','Daily_Sharpe_Ratio']]
mydata.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume,Daily_Return,Cumulitive_Return,Yrs_Publ_Trded,Avg_High,Avg_Low,Avg_Vol_Trader,Avg_Dividend,Avg_Daily_Return,Std_Daily_Return,Daily_Sharpe_Ratio
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
AA,1980-12-12,22.1,23.55,21.78,23.0,32216510.0,0.0,1.0,22.1,23.55,21.78,23.0,32216510.0,,0.937826,1,23.964408,22.212787,31367230.0,0.0,8e-05,0.004264,0.018733


In [316]:
mydata1 = mydata.drop(mydata.columns[:14], axis=1)
mydata2 = mydata1.reset_index(level=1, drop=True)
mydata3 = mydata2.groupby('Symbol').first()
mydata3

Unnamed: 0_level_0,Yrs_Publ_Trded,Avg_High,Avg_Low,Avg_Vol_Trader,Avg_Dividend,Avg_Daily_Return,Std_Daily_Return,Daily_Sharpe_Ratio
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AA,1,23.964408,22.212787,31367230.0,0.0,8e-05,0.004264,0.018733
AAPL,37,101.951188,99.385222,11821520.0,0.003658,0.000972,0.030862,0.031484
ABBV,4,38.655125,37.363113,12995260.0,0.001084,5.8e-05,0.010474,0.005503
ABC,22,42.110383,40.89456,2866444.0,0.001114,0.00042,0.019307,0.021771
ABT,34,47.652312,46.683037,6234715.0,0.007528,0.000502,0.018583,0.027026
