In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Reload all modules imported with %aimport
%load_ext autoreload
%autoreload 1

From https://pythonprogramming.net/combining-stock-prices-into-one-dataframe-python-programming-for-finance/

In [2]:
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web

import numpy as np

import pickle
import requests

from sklearn.base import TransformerMixin, BaseEstimator, clone
from sklearn.pipeline import Pipeline, make_pipeline

import re

idx = pd.IndexSlice

In [3]:
def save_sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
        
    with open("sp500tickers.pickle","wb") as f:
        pickle.dump(tickers,f)
        
    return tickers

In [4]:
def get_data_from_yahoo(reload_sp500=False):
    
    if reload_sp500:
        tickers = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle","rb") as f:
            tickers = pickle.load(f)
            
    tickers.remove('BRK.B')
    tickers.remove('BF.B')
    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2000, 1, 1)
    end = dt.datetime(2017, 12, 31)
    
    for ticker in tickers:

        # just in case your connection breaks, we'd like to save our progress!
        if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
            df = web.DataReader(ticker, "yahoo", start, end)
            df.to_csv('stock_dfs/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))

In [5]:
def compile_data():
    with open("sp500tickers.pickle","rb") as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)

        df.rename(columns={'Adj Close':ticker}, inplace=True)
        df.drop(['Open','High','Low','Close','Volume'],1,inplace=True)

        if main_df.empty:
            main_df = df
        else:
            main_df = main_df.join(df, how='outer')

        if count % 10 == 0:
            print(count)
    print(main_df.head())
    main_df.to_csv('sp500_joined_closes.csv')



In [191]:
sp500_tickers = save_sp500_tickers()

In [114]:
import re
[ ticker for ticker in sp500_tickers if re.match('.*\.B$', ticker)]

['BF.B']

In [47]:
sp500_tickers

['MMM',
 'ABT',
 'ABBV',
 'ACN',
 'ATVI',
 'AYI',
 'ADBE',
 'AMD',
 'AAP',
 'AES',
 'AET',
 'AMG',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALXN',
 'ALGN',
 'ALLE',
 'AGN',
 'ADS',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'APC',
 'ADI',
 'ANDV',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'AIV',
 'AAPL',
 'AMAT',
 'APTV',
 'ADM',
 'ARNC',
 'AJG',
 'AIZ',
 'T',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'BHGE',
 'BLL',
 'BAC',
 'BK',
 'BAX',
 'BBT',
 'BDX',
 'BRK.B',
 'BBY',
 'BIIB',
 'BLK',
 'HRB',
 'BA',
 'BWA',
 'BXP',
 'BSX',
 'BHF',
 'BMY',
 'AVGO',
 'BF.B',
 'CHRW',
 'CA',
 'COG',
 'CDNS',
 'CPB',
 'COF',
 'CAH',
 'CBOE',
 'KMX',
 'CCL',
 'CAT',
 'CBG',
 'CBS',
 'CELG',
 'CNC',
 'CNP',
 'CTL',
 'CERN',
 'CF',
 'SCHW',
 'CHTR',
 'CHK',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'XEC',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CTXS',
 'CLX',
 'CME',
 'CMS',
 'KO',
 

In [6]:
get_data_from_yahoo()

Already have MMM
Already have ABT
Already have ABBV
Already have ACN
Already have ATVI
Already have AYI
Already have ADBE
Already have AMD
Already have AAP
Already have AES
Already have AET
Already have AMG
Already have AFL
Already have A
Already have APD
Already have AKAM
Already have ALK
Already have ALB
Already have ARE
Already have ALXN
Already have ALGN
Already have ALLE
Already have AGN
Already have ADS
Already have LNT
Already have ALL
Already have GOOGL
Already have GOOG
Already have MO
Already have AMZN
Already have AEE
Already have AAL
Already have AEP
Already have AXP
Already have AIG
Already have AMT
Already have AWK
Already have AMP
Already have ABC
Already have AME
Already have AMGN
Already have APH
Already have APC
Already have ADI
Already have ANDV
Already have ANSS
Already have ANTM
Already have AON
Already have AOS
Already have APA
Already have AIV
Already have AAPL
Already have AMAT
Already have APTV
Already have ADM
Already have ARNC
Already have AJG
Already have AI

In [193]:
compile_data()

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
                  MMM        ABT  ABBV  ACN      ATVI  AYI       ADBE     AMD  \
Date                                                                            
2000-01-03  30.614885  10.008811   NaN  NaN  1.251975  NaN  16.274672  15.500   
2000-01-04  29.398392   9.722847   NaN  NaN  1.213892  NaN  14.909399  14.625   
2000-01-05  30.249941   9.704972   NaN  NaN  1.218653  NaN  15.204174  15.000   
2000-01-06  32.682892  10.044556   NaN  NaN  1.194851  NaN  15.328290  16.000   
2000-01-07  33.331699  10.151793   NaN  NaN  1.228173  NaN  16.072987  16.250   

            AAP        AES ...   WYNN       XEL        XRX       XLNX  \
Date                       ...                                          
2000-01-03  NaN  31.310091 ...    NaN  8.401768  49.551979  34.724194   
2000-01-04  NaN  30.

In [6]:
df = pd.read_csv('sp500_joined_closes.csv')

In [7]:
df.shape

(4528, 504)

In [8]:
df.columns

Index(['Date', 'MMM', 'ABT', 'ABBV', 'ACN', 'ATVI', 'AYI', 'ADBE', 'AMD',
       'AAP',
       ...
       'WYNN', 'XEL', 'XRX', 'XLNX', 'XL', 'XYL', 'YUM', 'ZBH', 'ZION', 'ZTS'],
      dtype='object', length=504)

In [7]:
def compile_data_all(tickers=None):
    if (tickers is None):
        with open("sp500tickers.pickle","rb") as f:
            tickers = pickle.load(f)

    main_df = pd.DataFrame()
    dfs = []
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)
        
        dfs.append(df)

        if count % 10 == 0:
            print(count)
            
    df_big = pd.concat( dfs, axis=1, keys=tickers)
    df_big.index.name = "Date"
    
    return df_big



In [None]:
dfA = pd.read_csv('stock_dfs/{}.csv'.format('AAPL'))
dfA = dfA.set_index('Date')
dfB = pd.read_csv('stock_dfs/{}.csv'.format('GOOG'))
dfB = dfB.set_index('Date')

In [54]:
data = dict((sym, web.DataReader(sym, "yahoo"))
          for sym in ['AAPL', 'GOOG'])

In [58]:
dfA = data['AAPL']
dfB = data[ 'GOOG']

In [60]:
pd.concat( [ dfA, dfB], axis=1, keys=['A', 'B'], names=["ticker", "attr"]).head()
                
                  

ticker,A,A,A,A,A,A,B,B,B,B,B,B
attr,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2010-01-04,30.49,30.642857,30.34,30.572857,27.406532,123432400,311.44931,312.721039,310.103088,311.349976,311.349976,3937800
2010-01-05,30.657143,30.798571,30.464285,30.625713,27.453915,150476200,311.563568,311.891449,308.76181,309.978882,309.978882,6048500
2010-01-06,30.625713,30.747143,30.107143,30.138571,27.017223,138040000,310.907837,310.907837,301.220856,302.164703,302.164703,8009000
2010-01-07,30.25,30.285715,29.864286,30.082857,26.967278,119282800,302.731018,303.029083,294.410156,295.130463,295.130463,12912000
2010-01-08,30.042856,30.285715,29.865715,30.282858,27.146566,111902700,294.08725,299.675903,292.651581,299.06488,299.06488,9509900


In [8]:
all = compile_data_all()

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500


In [12]:
with open("sp500_allAttrs.pickle","wb") as fp:
        pickle.dump(all,fp)

In [14]:
with open("sp500_allAttrs.pickle", "rb") as fp:
    all = pickle.load(fp)

In [15]:
all.shape
all.columns.values
all.columns.get_level_values(0).unique()
all.columns.get_level_values(1).unique()

(4528, 3018)

array([('MMM', 'Open'), ('MMM', 'High'), ('MMM', 'Low'), ...,
       ('ZTS', 'Close'), ('ZTS', 'Adj Close'), ('ZTS', 'Volume')], dtype=object)

Index(['MMM', 'ABT', 'ABBV', 'ACN', 'ATVI', 'AYI', 'ADBE', 'AMD', 'AAP', 'AES',
       ...
       'WYNN', 'XEL', 'XRX', 'XLNX', 'XL', 'XYL', 'YUM', 'ZBH', 'ZION', 'ZTS'],
      dtype='object', length=503)

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [84]:
all.tail()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,GOOG,GOOG,GOOG,GOOG,GOOG,GOOG
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-12-22,174.679993,175.419998,174.5,175.009995,175.009995,16349400,1061.109985,1064.199951,1059.439941,1060.119995,1060.119995,755100.0
2017-12-26,170.800003,171.470001,169.679993,170.570007,170.570007,33185500,1058.069946,1060.119995,1050.199951,1056.73999,1056.73999,760600.0
2017-12-27,170.100006,170.779999,169.710007,170.600006,170.600006,21498200,1057.390015,1058.369995,1048.050049,1049.369995,1049.369995,1271900.0
2017-12-28,171.0,171.850006,170.479996,171.080002,171.080002,16480200,1051.599976,1054.75,1044.77002,1048.140015,1048.140015,837100.0
2017-12-29,170.520004,170.589996,169.220001,169.229996,169.229996,25999900,1046.719971,1049.699951,1044.900024,1046.400024,1046.400024,887500.0


In [81]:
all.shape
all.columns
all.index
all.tail()

(4528, 12)

MultiIndex(levels=[['AAPL', 'GOOG'], ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']],
           labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]])

Index(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
       '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13', '2000-01-14',
       ...
       '2017-12-15', '2017-12-18', '2017-12-19', '2017-12-20', '2017-12-21',
       '2017-12-22', '2017-12-26', '2017-12-27', '2017-12-28', '2017-12-29'],
      dtype='object', length=4528)

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,GOOG,GOOG,GOOG,GOOG,GOOG,GOOG
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
2017-12-22,174.679993,175.419998,174.5,175.009995,175.009995,16349400,1061.109985,1064.199951,1059.439941,1060.119995,1060.119995,755100.0
2017-12-26,170.800003,171.470001,169.679993,170.570007,170.570007,33185500,1058.069946,1060.119995,1050.199951,1056.73999,1056.73999,760600.0
2017-12-27,170.100006,170.779999,169.710007,170.600006,170.600006,21498200,1057.390015,1058.369995,1048.050049,1049.369995,1049.369995,1271900.0
2017-12-28,171.0,171.850006,170.479996,171.080002,171.080002,16480200,1051.599976,1054.75,1044.77002,1048.140015,1048.140015,837100.0
2017-12-29,170.520004,170.589996,169.220001,169.229996,169.229996,25999900,1046.719971,1049.699951,1044.900024,1046.400024,1046.400024,887500.0


In [90]:
all.loc['2017-12-22': '2017-12-27']

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,GOOG,GOOG,GOOG,GOOG,GOOG,GOOG
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-12-22,174.679993,175.419998,174.5,175.009995,175.009995,16349400,1061.109985,1064.199951,1059.439941,1060.119995,1060.119995,755100.0
2017-12-26,170.800003,171.470001,169.679993,170.570007,170.570007,33185500,1058.069946,1060.119995,1050.199951,1056.73999,1056.73999,760600.0
2017-12-27,170.100006,170.779999,169.710007,170.600006,170.600006,21498200,1057.390015,1058.369995,1048.050049,1049.369995,1049.369995,1271900.0


In [97]:
all.loc['2017-12-22': '2017-12-27', idx[:,['Close', 'Adj Close'] ] ]

Unnamed: 0_level_0,AAPL,AAPL,GOOG,GOOG
Unnamed: 0_level_1,Close,Adj Close,Close,Adj Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2017-12-22,175.009995,175.009995,1060.119995,1060.119995
2017-12-26,170.570007,170.570007,1056.73999,1056.73999
2017-12-27,170.600006,170.600006,1049.369995,1049.369995


In [140]:
a = all.loc['2017-12-22': '2017-12-27', idx['AAPL',['Close', 'Adj Close'] ] ]
b = all.loc['2017-12-22': '2017-12-27', idx['GOOG',['Close', 'Adj Close'] ] ]
a
b
c = pd.concat([a,b], axis=1)
c
c.columns

d = all.loc['2017-12-22': '2017-12-27', idx[:,['Close', 'Adj Close'] ] ]
d
d.columns

Unnamed: 0_level_0,AAPL,AAPL
Unnamed: 0_level_1,Close,Adj Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2017-12-22,175.009995,175.009995
2017-12-26,170.570007,170.570007
2017-12-27,170.600006,170.600006


Unnamed: 0_level_0,GOOG,GOOG
Unnamed: 0_level_1,Close,Adj Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2017-12-22,1060.119995,1060.119995
2017-12-26,1056.73999,1056.73999
2017-12-27,1049.369995,1049.369995


Unnamed: 0_level_0,AAPL,AAPL,GOOG,GOOG
Unnamed: 0_level_1,Close,Adj Close,Close,Adj Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2017-12-22,175.009995,175.009995,1060.119995,1060.119995
2017-12-26,170.570007,170.570007,1056.73999,1056.73999
2017-12-27,170.600006,170.600006,1049.369995,1049.369995


MultiIndex(levels=[['AAPL', 'GOOG'], ['Adj Close', 'Close']],
           labels=[[0, 0, 1, 1], [1, 0, 1, 0]])

Unnamed: 0_level_0,AAPL,AAPL,GOOG,GOOG
Unnamed: 0_level_1,Close,Adj Close,Close,Adj Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2017-12-22,175.009995,175.009995,1060.119995,1060.119995
2017-12-26,170.570007,170.570007,1056.73999,1056.73999
2017-12-27,170.600006,170.600006,1049.369995,1049.369995


MultiIndex(levels=[['AAPL', 'GOOG'], ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']],
           labels=[[0, 0, 1, 1], [3, 4, 3, 4]])

In [21]:
singleAttr = all.loc['2017-12-22': '2017-12-27', idx[['AAPL', 'GOOG'], ['Adj Close'] ] ]
singleAttr
type(singleAttr.columns)

singleAttr.columns = singleAttr.columns.droplevel(1)
singleAttr
type(singleAttr.columns)

Unnamed: 0_level_0,GOOG,AAPL
Unnamed: 0_level_1,Adj Close,Adj Close
Date,Unnamed: 1_level_2,Unnamed: 2_level_2
2017-12-22,1060.119995,175.009995
2017-12-26,1056.73999,170.570007
2017-12-27,1049.369995,170.600006


pandas.indexes.multi.MultiIndex

Unnamed: 0_level_0,GOOG,AAPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-12-22,1060.119995,175.009995
2017-12-26,1056.73999,170.570007
2017-12-27,1049.369995,170.600006


pandas.indexes.base.Index

In [98]:
all.tail(10).to_csv('all.csv')

In [99]:
pd.read_csv('all.csv')

Unnamed: 0.1,Unnamed: 0,AAPL,AAPL.1,AAPL.2,AAPL.3,AAPL.4,AAPL.5,GOOG,GOOG.1,GOOG.2,GOOG.3,GOOG.4,GOOG.5
0,,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
1,Date,,,,,,,,,,,,
2,2017-12-15,173.630005,174.169998,172.46000700000005,173.970001,173.970001,40169300,1054.609985,1067.619995,1049.5,1064.189941,1064.189941,3275900.0
3,2017-12-18,174.880005,177.199997,174.860001,176.419998,176.419998,29421100,1066.079956,1078.48999,1062.0,1077.140015,1077.140015,1554600.0
4,2017-12-19,175.029999,175.389999,174.089996,174.53999299999995,174.53999299999995,27436400,1075.199951,1076.839966,1063.550049,1070.680054,1070.680054,1338700.0
5,2017-12-20,174.869995,175.419998,173.25,174.350006,174.350006,23475600,1071.780029,1073.380005,1061.52002,1064.949951,1064.949951,1268600.0
6,2017-12-21,174.169998,176.020004,174.100006,175.009995,175.009995,20949900,1064.949951,1069.329956,1061.793945,1063.630005,1063.630005,995700.0
7,2017-12-22,174.679993,175.419998,174.5,175.009995,175.009995,16349400,1061.109985,1064.199951,1059.439941,1060.119995,1060.119995,755100.0
8,2017-12-26,170.800003,171.470001,169.679993,170.570007,170.570007,33185500,1058.069946,1060.119995,1050.199951,1056.73999,1056.73999,760600.0
9,2017-12-27,170.100006,170.779999,169.71000700000005,170.600006,170.600006,21498200,1057.390015,1058.369995,1048.050049,1049.369995,1049.369995,1271900.0


In [16]:
class SelectColumnsTransfomer(BaseEstimator, TransformerMixin):
    """ A DataFrame transformer that provides column selection
    
    Allows to select columns by name from pandas dataframes in scikit-learn
    pipelines.
    
    Parameters
    ----------
    columns : list of str, names of the dataframe columns to select
        Default: [] 
    
    """
    def __init__(self, columns=[]):
        self.columns = columns

    def transform(self, X, **transform_params):
        """ Selects columns of a DataFrame
        
        Parameters
        ----------
        X : pandas DataFrame
            
        Returns
        ----------
        
        trans : pandas DataFrame
            contains selected columns of X      
        """
        trans = X[self.columns].copy() 
        return trans

    def fit(self, X, y=None, **fit_params):
        """ Do nothing function
        
        Parameters
        ----------
        X : pandas DataFrame
        y : default None
                
        
        Returns
        ----------
        self  
        """
        return self
    

In [17]:
class DataFrameFunctionTransformer(BaseEstimator, TransformerMixin):
    """ A DataFrame transformer providing imputation or function application
    
    Parameters
    ----------
    impute : Boolean, default False
        
    func : function that acts on an array of the form [n_elements, 1]
        if impute is True, functions must return a float number, otherwise 
        an array of the form [n_elements, 1]
    
    """
    
    def __init__(self, func, impute = False):
        self.func = func
        self.impute = impute
        self.series = pd.Series() 

    def transform(self, X, **transformparams):
        """ Transforms a DataFrame
        
        Parameters
        ----------
        X : DataFrame
            
        Returns
        ----------
        trans : pandas DataFrame
            Transformation of X 
        """
        
        if self.impute:
            trans = pd.DataFrame(X).fillna(self.series).copy()
        else:
            trans = pd.DataFrame(X).apply(self.func).copy()
        return trans

    def fit(self, X, y=None, **fitparams):
        """ Fixes the values to impute or does nothing
        
        Parameters
        ----------
        X : pandas DataFrame
        y : not used, API requirement
                
        Returns
        ----------
        self  
        """
        
        if self.impute:
            self.series = pd.DataFrame(X).apply(self.func).copy()
        return self
    
    

In [18]:
class DataFrameFeatureUnion(BaseEstimator, TransformerMixin):
    """ A DataFrame transformer that unites several DataFrame transformers
    
    Fit several DataFrame transformers and provides a concatenated
    Data Frame
    
    Parameters
    ----------
    list_of_transformers : list of DataFrameTransformers
        
    """ 
    def __init__(self, list_of_transformers):
        self.list_of_transformers = list_of_transformers
        
    def transform(self, X, **transformparamn):
        """ Applies the fitted transformers on a DataFrame
        
        Parameters
        ----------
        X : pandas DataFrame
        
        Returns
        ----------
        concatted :  pandas DataFrame
        
        """
        
        concatted = pd.concat([transformer.transform(X)
                            for transformer in
                            self.fitted_transformers_], axis=1).copy()
        return concatted


    def fit(self, X, y=None, **fitparams):
        """ Fits several DataFrame Transformers
        
        Parameters
        ----------
        X : pandas DataFrame
        y : not used, API requirement
        
        Returns
        ----------
        self : object
        """
        
        self.fitted_transformers_ = []
        for transformer in self.list_of_transformers:
            fitted_trans = clone(transformer).fit(X, y=None, **fitparams)
            self.fitted_transformers_.append(fitted_trans)
        return self

In [68]:
class GenSelectAttrTransfomer(BaseEstimator, TransformerMixin):
    """ A DataFrame transformer that provides column selection
    
    Allows to select columns by name from pandas dataframes in scikit-learn
    pipelines.
    
    Parameters
    ----------
    columns : list of str, names of the dataframe columns to select
        Default: [] 
    
    """
    def __init__(self, columns):
        self.columns = columns

    def transform(self, X, **transform_params):
        """ Selects columns of a DataFrame
        
        Parameters
        ----------
        X : pandas DataFrame
            
        Returns
        ----------
        
        trans : pandas DataFrame
            contains selected columns of X      
        """
        colType = type(X.columns).__name__
        if (re.match('MultiIndex$', colType)):
            trans = X.loc[:, idx[:, self.columns] ] .copy()
            if (len(self.columns) == 1):
                trans.columns = trans.columns.droplevel(1)
        else:
            trans = X.loc[:, self.column].copy()
        
        return trans

    def fit(self, X, **fit_params):
        """ Do nothing function
        
        Parameters
        ----------
        X : pandas DataFrame
                
        
        Returns
        ----------
        self  
        """
        return self

In [82]:
class GenPctChangeTransfomer(BaseEstimator, TransformerMixin):
    """ A DataFrame transformer that provides Percent Change
    
    
    Parameters
    ----------
    None
    
    """
    def __init__(self):
        return

    def transform(self, X, **transform_params):
        """ Selects columns of a DataFrame
        
        Parameters
        ----------
        X : pandas DataFrame
            
        Returns
        ----------
        
        trans : pandas DataFrame
            contains Percent Change columns of X      
        """
        
        trans = X.pct_change().copy()
        
        return trans

    def fit(self, X, y=None, **fit_params):
        """ Do nothing function
        
        Parameters
        ----------
        X : pandas DataFrame
        y : default None
                
        
        Returns
        ----------
        self  
        """
        return self

In [83]:
single_pipe = make_pipeline(GenSelectAttrTransfomer(['Adj Close'] ),
                            GenPctChangeTransfomer()
                           )
s = single_pipe.fit_transform(all)
s.tail()

Unnamed: 0_level_0,MMM,ABT,ABBV,ACN,ATVI,AYI,ADBE,AMD,AAP,AES,...,WYNN,XEL,XRX,XLNX,XL,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-12-22,-0.000213,0.0,0.003064,-0.00201,-0.008758,-0.001334,0.002521,-0.03214,0.004195,0.008475,...,-0.00451,-0.002893,-0.004376,-0.006582,0.003137,-0.002509,-0.001212,0.001417,-0.002526,-0.004012
2017-12-26,0.003067,0.00123,-0.004684,-0.005848,-0.016432,0.022651,-0.0032,-0.00759,0.014023,-0.006536,...,0.005186,-0.008705,-0.005747,-0.004711,0.00199,-0.001184,-0.002549,-0.001332,-0.009156,0.004862
2017-12-27,0.003185,0.008246,0.003478,0.002157,-0.001734,0.006531,0.005274,0.006692,-0.021479,0.00282,...,0.00943,0.006952,0.00238,0.004586,-0.001135,0.010815,0.002555,0.001501,-0.002949,0.001521
2017-12-28,-0.002032,-0.000174,-0.003058,0.001631,0.000947,0.003442,0.001083,0.001899,-0.000601,0.008435,...,-0.010164,0.005858,0.007527,0.008688,0.004829,0.000293,0.003277,0.01543,0.012424,-0.000828
2017-12-29,-0.001485,-0.006787,-0.011044,-0.003126,-0.001262,-0.010346,-0.001766,-0.025592,-0.000201,0.006506,...,0.000653,0.000624,-0.010187,-0.015766,-0.005937,-0.000733,-0.012822,-0.008871,-0.009934,-0.004835


In [72]:
s.loc['2017-12-22':'2017-12-27'].pct_change().rank(axis=1)

Unnamed: 0_level_0,MMM,ABT,ABBV,ACN,ATVI,AYI,ADBE,AMD,AAP,AES,...,WYNN,XEL,XRX,XLNX,XL,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-12-22,,,,,,,,,,,...,,,,,,,,,,
2017-12-26,338.0,282.0,113.0,87.0,10.0,488.0,140.0,58.0,468.0,78.0,...,387.0,40.0,89.0,112.0,305.0,191.0,154.0,188.0,34.0,375.0
2017-12-27,337.0,465.0,351.0,296.0,150.0,433.0,408.0,436.0,10.0,325.0,...,479.0,444.0,302.0,385.0,170.0,488.0,312.0,271.0,122.0,273.0


In [66]:
s.loc['2017-12-22':'2017-12-27'].rank(axis=1)

Unnamed: 0_level_0,MMM,ABT,ABBV,ACN,ATVI,AYI,ADBE,AMD,AAP,AES,...,WYNN,XEL,XRX,XLNX,XL,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-12-22,474.0,164.0,315.0,417.0,193.0,439.0,442.0,2.0,327.0,3.0,...,432.0,123.0,48.0,217.0,70.0,215.0,269.0,371.0,137.0,231.0
2017-12-26,474.0,162.0,316.0,414.0,185.0,444.0,441.0,2.0,330.0,3.0,...,432.0,119.0,47.0,214.0,71.0,213.0,269.0,372.0,134.0,233.0
2017-12-27,474.0,165.0,316.0,414.0,186.0,446.0,441.0,2.0,324.0,3.0,...,433.0,122.0,48.0,215.0,71.0,217.0,269.0,371.0,133.0,234.0


In [120]:
singleAttr.head()

Unnamed: 0_level_0,AAPL,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-12-22,175.009995,1060.119995
2017-12-26,170.570007,1056.73999
2017-12-27,170.600006,1049.369995


In [124]:
singleAttr.apply(np.log, axis=0)

Unnamed: 0_level_0,AAPL,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-12-22,5.164843,6.966137
2017-12-26,5.139146,6.962944
2017-12-27,5.139322,6.955945


In [126]:
ticker_pipeline = make_pipeline(  
        SelectColumnsTransfomer(['GOOG'])
)

In [134]:
ticker_pipeline.fit_transform(singleAttr).pct_change()

Unnamed: 0_level_0,GOOG
Date,Unnamed: 1_level_1
2017-12-22,
2017-12-26,-0.003188
2017-12-27,-0.006974


In [133]:
p_1 = make_pipeline( DataFrameFunctionTransformer(func = lambda x: x.pct_change()) )
p_1.fit_transform(singleAttr)

Unnamed: 0_level_0,AAPL,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-12-22,,
2017-12-26,-0.02537,-0.003188
2017-12-27,0.000176,-0.006974


In [136]:
p_2 = make_pipeline( DataFrameFunctionTransformer( func = lambda df: df.pct_change()))
p_2.fit_transform(all['2017-12-22': '2017-12-27'])

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,GOOG,GOOG,GOOG,GOOG,GOOG,GOOG
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-12-22,,,,,,,,,,,,
2017-12-26,-0.022212,-0.022517,-0.027622,-0.02537,-0.02537,1.029769,-0.002865,-0.003834,-0.008722,-0.003188,-0.003188,0.007284
2017-12-27,-0.004098,-0.004024,0.000177,0.000176,0.000176,-0.352181,-0.000643,-0.001651,-0.002047,-0.006974,-0.006974,0.672232
