# Data Preparation
The data, overall, is pretty clean. 

There are two things I need to handle in preparation for analysis:
* First, I account for stock splits. 
* Second, I transform the data into log-returns. 

In [1]:
import pandas as pd
import numpy as np
import quandl

Unnamed: 0.1,Unnamed: 0,MMM,ABT,ABBV,ACN,ATVI,AYI,ADBE,AMD,AAP,...,WYNN,XEL,XRX,XLNX,XL,XYL,YUM,ZBH,ZION,ZTS
0,1999-01-04,23.747541,13.822417,,,0.828795,,6.1858,14.0,,...,,11.457631,177.511958,12.447396,45.418931,,6.343066,,47.061397,
1,1999-01-05,23.884369,13.839429,,,0.823463,,6.054503,13.845,,...,,11.482684,180.827122,12.983069,45.231674,,6.465631,,46.967758,
2,1999-01-06,24.998082,13.73452,,,0.837936,,5.83774,13.625,,...,,11.536966,184.292975,12.867348,42.700681,,6.564474,,46.967758,
3,1999-01-07,24.422133,13.555892,,,0.833366,,5.961605,13.72,,...,,11.482684,183.539529,12.774026,43.491994,,6.531526,,46.624415,
4,1999-01-08,24.543051,13.504856,,,0.823463,,6.045833,14.0,,...,,11.403349,182.635393,12.785224,41.679828,,6.548659,,46.725857,


In [2]:
data = data.set_index(data.columns[0])
data.index.rename("date", inplace=True)
data.head()

Unnamed: 0_level_0,MMM,ABT,ABBV,ACN,ATVI,AYI,ADBE,AMD,AAP,AES,...,WYNN,XEL,XRX,XLNX,XL,XYL,YUM,ZBH,ZION,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-01-04,23.747541,13.822417,,,0.828795,,6.1858,14.0,,18.803813,...,,11.457631,177.511958,12.447396,45.418931,,6.343066,,47.061397,
1999-01-05,23.884369,13.839429,,,0.823463,,6.054503,13.845,,18.73081,...,,11.482684,180.827122,12.983069,45.231674,,6.465631,,46.967758,
1999-01-06,24.998082,13.73452,,,0.837936,,5.83774,13.625,,18.73081,...,,11.536966,184.292975,12.867348,42.700681,,6.564474,,46.967758,
1999-01-07,24.422133,13.555892,,,0.833366,,5.961605,13.72,,18.250533,...,,11.482684,183.539529,12.774026,43.491994,,6.531526,,46.624415,
1999-01-08,24.543051,13.504856,,,0.823463,,6.045833,14.0,,17.578145,...,,11.403349,182.635393,12.785224,41.679828,,6.548659,,46.725857,


## 1) Accounting for Stock Splits
Stock splits cause an increase in the supply of the stock while the company value remains the same. Typically this is done 2 to 1. Meaning, if you hold 10 stocks of AAPL priced at 150 each, it will become 20 stocks priced at 75 each. Unaccounted for it would lead to a drastic decrease.

## 2) Log-Returns
Most analyses on stock returns are done in log-returns because of the additive property they have. The sum of 10 consecutive log-returns is the same as the log-return between the first and last of those 10 periods.

In [24]:
# Define a function to keep things clean
def log_return(stock):
    log_ret = np.log(stock) - np.log(stock.shift(1))
    return(log_ret)

# Apply above function across all columns
logret = data.apply(log_return)
logret.head()

Unnamed: 0_level_0,MMM,ABT,ABBV,ACN,ATVI,AYI,ADBE,AMD,AAP,AES,...,WYNN,XEL,XRX,XLNX,XL,XYL,YUM,ZBH,ZION,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1999-01-04,,,,,,,,,,,...,,,,,,,,,,
1999-01-05,0.005745,0.00123,,,-0.006455,,-0.021454,-0.011133,,-0.00389,...,,0.002184,0.018503,0.042135,-0.004131,,0.019138,,-0.001992,
1999-01-06,0.045575,-0.007609,,,0.017424,,-0.036459,-0.016018,,0.0,...,,0.004716,0.018985,-0.008953,-0.057583,,0.015172,,0.0,
1999-01-07,-0.023309,-0.013091,,,-0.005469,,0.020996,0.006948,,-0.025975,...,,-0.004716,-0.004097,-0.007279,0.018362,,-0.005032,,-0.007337,
1999-01-08,0.004939,-0.003772,,,-0.011954,,0.01403,0.020203,,-0.037538,...,,-0.006933,-0.004938,0.000876,-0.04256,,0.00262,,0.002173,


pandas.core.indexes.base.Index