In [3]:
# default_exp feature_eng

In [None]:
# export
from mlfinlab.microstructural_features import (
    get_roll_measure,
    get_roll_impact,
    get_bar_based_kyle_lambda,
    get_bar_based_amihud_lambda,
)
from mlfinlab.microstructural_features.entropy import (
    get_shannon_entropy,
    get_lempel_ziv_entropy,
    get_plug_in_entropy,
    get_konto_entropy
)

from copy import deepcopy
import pandas as pd
import numpy as np
import logging
import string
from mlbt.load_data import load_feat, save_feat
from mlbt.frac_diff import frac_diff_ffd
from mlbt.load_data import get_data, SYMBOLS_CSV, safe_feat_name, process_bars
SYMBOLS_CSV = SYMBOLS_CSV.copy()
SYMBOLS_CSV.columns = SYMBOLS_CSV.columns.str.lower()
SYMBOLS_DICT = SYMBOLS_CSV.T.to_dict()


def roll_measure(df, window, price="Close"):
    """The Roll measure attempts to estimate the bid-ask spread (i.e. liquidity) of an instrument"""
    return get_roll_measure(df[price], window)


def roll_impact(df, window, price="Close", dollar_volume="Dollar Volume"):
    """The Roll measure divided by dollar volume"""
    return roll_measure(df, window, price) / df[dollar_volume] * 1e9


def kyle(df, window, price="Close", volume="Volume"):
    """A measure of market impact cost (i.e. liquidity) from Kyle (1985)"""
    return get_bar_based_kyle_lambda(df[price], df[volume], window) * 1e9


def amihud(df, window, price="Close", dollar_volume="Dollar Volume"):
    """A measure of market impact cost (i.e. liquidity) from Amihud (2002)"""
    return get_bar_based_amihud_lambda(df[price], df[dollar_volume], window) * 1e9


def autocorr(df, window, lag, column="Close"):
    """The raw price series' serial correlation"""
    return df[column].rolling(window).apply(lambda x: x.autocorr(lag=lag), raw=False)


def stdev(df, window, column="Close"):
    """The raw price series' standard deviation"""
    return df[column].rolling(window).std()


def int_ret(df, periods=1, column="Close"):
    """First difference of log-transformed prices"""
    return df[column].pct_change(periods=periods)


def log_ret(df, periods=1, column="Close"):
    """First difference of log-transformed prices"""
    return np.log(df[column]).diff(periods=periods)


def ffd(df, d, column="Close"):
    """Fractionally differentiated prices"""
    return frac_diff_ffd(np.log(df[column].to_frame('Close')), d)['Close']


def volratio(df, com, volume="Volume", buy_volume="Buy Volume"):
    """
    EWM of bar-by-bar buy volume divided by total volume
    (i.e. a value >0.50 would indicate buyers driving the market)
    """
    return (df[buy_volume] / df[volume]).ewm(com=com).mean()


def binary_encoding(returns):
    return np.sign(returns).map({-1: 'a', 1: 'b'})

def quantile_encoding(returns, q=10):
    return pd.qcut(returns, q=q, labels=list(string.printable[:q]), duplicates='drop')

def sigma_encoding(returns):
    stdev = returns.std()

    nbins = int((returns.max() - returns.min()) / stdev)
    min_ = returns.min()

    bins = pd.IntervalIndex.from_tuples([
        (
            min_+ (stdev * i), min_+ (stdev * (i + 1))
        ) for i in range(nbins + 1)
    ])
    x = pd.cut(returns, bins)

    mapper = dict(zip(bins, list(string.printable)[:nbins]))
    sigma_q = x.values.map(mapper)
    return pd.Series(sigma_q, index=returns.index)    

ENCODERS = {
    'binary': binary_encoding,
    'quantile': quantile_encoding,
    'sigma': sigma_encoding,
}


ENTROPY_FUNS = {
    'shannon': get_shannon_entropy,
    'plugin': get_plug_in_entropy,
    'konto': get_konto_entropy,
    'lz': get_lempel_ziv_entropy,
}

def entropy(df, method, encoding, window, column="Close", konto_len=None):
    encoder = ENCODERS[encoding]
    entropy_fun = ENTROPY_FUNS[method]
    kwargs = {"window": konto_len} if konto_len is not None else {}
#     apply_entropy = lambda x, fun:
    debug = lambda x: x
    encoded = encoder(df[column]).dropna()
    rolling = encoded.rolling(window)


#     df = pd.DataFrame({'col1':list('some_string')})
#     window = 5
    ss = pd.Series([
        ''.join(encoded.values[max(i-window+1, 0): i+1])
        for i in range(len(encoded.values))
    ])
    
    entropies = ss.apply(lambda x: entropy_fun(x, **kwargs))
    entropies.index = encoded.index

    entropies[:window-1]=np.nan
    assert entropies.index.is_unique
    return entropies
    

def stdev_from_ma(df, window, column="Close"):
    rolly = df[column].rolling(window)
    return (df[column] - rolly.mean()) / rolly.std()


def close(df, column="Close"):
    return df[column]


def lag(df, lag, column="Close"):
    return df[column].shift(lag)


def lag_change(df, lag, column="Close"):
    return df[column].pct_change(lag)


def lag_diff(df, lag, column="Close"):
    col = df[column]
    return col - col.shift(lag)


def ema(df, com, column="Close"):
    return df[column].ewm(com=com).mean()

    
# Dates
def month(df, column="Time"):
    return df[column].dt.month


def week(df, column="Time"):
    return df[column].dt.week

    
def day(df, column="Time"):
    return df[column].dt.day

    
def weekday(df, column="Time"):
    return df[column].dt.weekday

    
def hour(df, column="Time"):
    return df[column].dt.hour


def tick_bars(df, size, column="Close"):
    return process_bars(df, size, "tick")[column]

    
FEATURES = {
    "auto": autocorr,
    "stdev": stdev,
    "roll": roll_measure,
    "rollimp": roll_impact,
    "kyle": kyle,
    "amihud": amihud,
    "volratio": volratio,
    "entropy": entropy,
    "int_ret": log_ret,
    "log_ret": log_ret,
    "ffd": ffd,
    "close": close,
    "lag": lag,
    "lag_change": lag_change,
    "ema": ema,
    "stdev_from_ma": stdev_from_ma,

    "time_bars": tick_bars,

    "weekday": weekday,
    "hour": hour,
}

def run_feature_engineering(config, deck):
    """Load already-engineered features or engineer if we can't"""
    for symbol, symbol_deck in deck.items():
        logging.debug(f"{symbol}: Feature engineering for {len(config['features'])} features")
        bars = symbol_deck['bars']
        feats = []
        for feat_config in config["features"]:
            # We pass a copy in so the feat_eng code can modify that to its hearts content,
            # while for us the information remains non-redundant
            name = safe_feat_name(feat_config, safe_for_fs=False)
            feat = engineer_feature(deck, symbol, config, feat_config)["Close"]
            logging.debug(f'Got {feat.shape} shape for feature: {name}')
            feat.name = name
            bars_index = deck[symbol]['bars'].index
            if feat.index.shape != bars_index.shape:
                # We're only interested in values we have prices for
                # Do this now so concat below is fast (and has the same set of indices across)
                feat = feat.reindex(index=bars_index, method='ffill')
            
            feats.append(feat)
        feats2 = pd.concat(feats, axis=1)
        logging.debug(f"Joined {len(feats)} features into {feats2.shape} shape")
        # Reindex in case of outside feats
        deck[symbol]['feats'] = feats2
    return deck

def get_bars(deck, symbol, config):
    if symbol in deck:
        # TODO: Remove deep copy
        bars = deck[symbol]['bars'].copy(deep=True)
    else:
        # We're loading a feature external to the price data of our trading universe
        bars = get_data(symbol, "minutely", config["start_date"], config["end_date"])
        
    return bars

def fill_out_symbol(feat_conf, for_symbol):
    symbol = feat_conf['symbol'] = feat_conf.get('symbol', for_symbol)
    if isinstance(symbol, dict):
        feat_conf['symbol'] = fill_out_symbol(symbol, for_symbol)
    return feat_conf
    

def engineer_feature(deck, for_symbol, config, feat_conf):
    """Parse and compute a feature"""
    feat_conf = deepcopy(feat_conf)
    fill_out_symbol(feat_conf, for_symbol)

    symbol = feat_conf['symbol']

    feat = load_feat(config, feat_conf)
    if feat is not None:
        return feat

    
    if isinstance(symbol, dict):
        # We're computing a feature on a feature
        df = engineer_feature(deck, for_symbol, config, symbol)
    else:
        df = get_bars(deck, symbol, config)
    
    feat = compute_feature(deck, for_symbol, config, feat_conf, symbol, df)

    if config["save_to_disk"]:
        save_feat(config, feat_conf, feat)
    return feat

def compute_feature(deck, for_symbol, config, feat_conf, symbol, df):
    logging.debug(f"Computing {feat_conf['name']} for {for_symbol}: {feat_conf}")
    drop = ['name', 'symbol']
    params = {k:v for k, v in feat_conf.items() if not k in drop}

    feat_name = feat_conf['name']
    if feat_name in ['sector', 'exchange']:
        categories = list(sorted(set(SYMBOLS_CSV[feat_name])))
        category = SYMBOLS_DICT[symbol][feat_name]
        feat = pd.Series(categories.index(category), index=df.index)
    else:
        feat = FEATURES[feat_name](df, **params)
            
    # Every feature's column is called Close to enable easy recursion
    feat = feat.to_frame("Close")
        
    return feat


def define_feature_configs():
    """Stake out the list of features that is the basis for our features matrix"""
    ffd_f = {"name": "ffd", "d": 0.3}
    int_ret = {"name": "int_ret"}
    log_ret = {"name": "log_ret"}
    vix_1h = {"name": "time_bars", "size": 60, "symbol": 'VIX.XO'}
    int_ret_vix = {"name": "int_ret", "symbol": vix_1h}
    log_ret_vix = {"name": "log_ret", "symbol": vix_1h}

    features = [
        int_ret,
        int_ret_vix,
        log_ret,
        log_ret_vix,
        ffd_f,
    ]
    windows = [25, 50, 250]#, 500]

    for window in windows:
        features.append({"name": "int_ret", "periods": window})
        features.append({"name": "log_ret", "periods": window})
        
        features.append({"name": "roll", "window": window})
        features.append({"name": "rollimp", "window": window})
        features.append({"name": "amihud", "window": window})
        features.append({"name": "kyle", "window": window})

        volratio = {"name": "volratio", "com": window}
        stdev_volratio = {"name": "stdev", "window": window, "symbol": volratio}

        features.append(volratio)
        features.append({"name": "lag", "lag": window, "symbol": volratio})

        # Volatilty
        stdev_int_ret_vix = {"name": "stdev", "window": window, "symbol": int_ret_vix}
        features.append(stdev_int_ret_vix)
        
        stdev_log_ret_vix = {"name": "stdev", "window": window, "symbol": log_ret_vix}
        features.append(stdev_log_ret_vix)

        stdev_int_ret = {"name": "stdev", "window": window, "symbol": int_ret}
        features.append(stdev_int_ret)
        features.append({"name": "stdev", "window": window, "symbol": stdev_int_ret})

        stdev_log_ret = {"name": "stdev", "window": window, "symbol": log_ret}
        features.append(stdev_log_ret)
        features.append({"name": "stdev", "window": window, "symbol": stdev_log_ret})

        stdev_ffd = {"name": "stdev", "window": window, "symbol": ffd_f} 
        features.append(stdev_ffd)

        for lag in windows:
            if lag < window:
                features.append({"name": "auto", "window": window, "lag": lag, "symbol": int_ret_vix})
                features.append({"name": "auto", "window": window, "lag": lag, "symbol": log_ret_vix})
                features.append({"name": "auto", "window": window, "lag": lag, "symbol": volratio})
                features.append({"name": "auto", "window": window, "lag": lag, "symbol": stdev_volratio})
                
        for encoding in ENCODERS:
            for ret in [int_ret, log_ret]:
                features.append({"name": "entropy", "method": "shannon", "encoding": encoding, "window": window, "symbol": ret})
                features.append({"name": "entropy", "method": "plugin", "encoding": encoding, "window": window, "symbol": ret})
                features.append({"name": "entropy", "method": "lz", "encoding": encoding, "window": window, "symbol": ret})
#                 for konto_div in [5, 10]:
#                     konto_len = (window // konto_div) + 1
#                     features.append({"name": "entropy", "method": "konto", "encoding": encoding, "window": window, "symbol": ret, "konto_len": konto_len})
        
        
    return features


In [None]:
feats = define_feature_configs()

In [None]:
feats

In [None]:
len(feats)

In [286]:
from mlbt.load_data import load_bars
symbol = '@NQ#C'
config = {
    'bar_type': 'dollar', 
    'load_from_disk': True, 
    'save_to_disk': True, 
    'start_date': None, 
    'end_date': None, 
    'features': feats[:],
}
nq = load_bars(symbol, {**config, **{'load_from_disk': True}})

In [287]:
deck = {symbol: {'bars': nq}}
for_symbol = symbol

In [288]:
FORMAT = "%(asctime)-15s %(message)s"
logging.basicConfig(format=FORMAT, level=logging.DEBUG)

x = run_feature_engineering(config, deck)

2020-02-12 15:49:09,491 @NQ#C: Feature engineering for 87 features
2020-02-12 15:49:09,496 Got (31156,) shape for feature: {"name": "int_ret"}
2020-02-12 15:49:09,501 Got (27209,) shape for feature: {"name": "int_ret", "symbol": "{\"name\": \"time_bars\", \"size\": 60, \"symbol\": \"VIX.XO\"}"}
2020-02-12 15:49:09,507 Got (31156,) shape for feature: {"name": "log_ret"}
2020-02-12 15:49:09,512 Got (27209,) shape for feature: {"name": "log_ret", "symbol": "{\"name\": \"time_bars\", \"size\": 60, \"symbol\": \"VIX.XO\"}"}
2020-02-12 15:49:09,517 Got (31156,) shape for feature: {"d": 0.3, "name": "ffd"}
2020-02-12 15:49:09,522 Got (31156,) shape for feature: {"name": "int_ret", "periods": 25}
2020-02-12 15:49:09,527 Got (31156,) shape for feature: {"name": "log_ret", "periods": 25}
2020-02-12 15:49:09,531 Got (31156,) shape for feature: {"name": "roll", "window": 25}
2020-02-12 15:49:09,536 Got (31156,) shape for feature: {"name": "rollimp", "window": 25}
2020-02-12 15:49:09,541 Got (31156

2020-02-12 15:49:09,810 Got (31156,) shape for feature: {"name": "stdev", "symbol": "{\"name\": \"log_ret\"}", "window": 50}
2020-02-12 15:49:09,815 Got (31156,) shape for feature: {"name": "stdev", "symbol": "{\"name\": \"stdev\", \"symbol\": \"{\\\"name\\\": \\\"log_ret\\\"}\", \"window\": 50}", "window": 50}
2020-02-12 15:49:09,820 Got (31156,) shape for feature: {"name": "stdev", "symbol": "{\"d\": 0.3, \"name\": \"ffd\"}", "window": 50}
2020-02-12 15:49:09,825 Got (27209,) shape for feature: {"lag": 25, "name": "auto", "symbol": "{\"name\": \"int_ret\", \"symbol\": \"{\\\"name\\\": \\\"time_bars\\\", \\\"size\\\": 60, \\\"symbol\\\": \\\"VIX.XO\\\"}\"}", "window": 50}
2020-02-12 15:49:09,831 Got (27209,) shape for feature: {"lag": 25, "name": "auto", "symbol": "{\"name\": \"log_ret\", \"symbol\": \"{\\\"name\\\": \\\"time_bars\\\", \\\"size\\\": 60, \\\"symbol\\\": \\\"VIX.XO\\\"}\"}", "window": 50}
2020-02-12 15:49:09,839 Got (31156,) shape for feature: {"lag": 25, "name": "auto"

In [274]:
%debug

> [1;32m<ipython-input-265-7eb9a52732c0>[0m(103)[0;36msigma_encoding[1;34m()[0m
[1;32m    101 [1;33m[1;33m[0m[0m
[0m[1;32m    102 [1;33m    [0msigma_q[0m [1;33m=[0m [0mx[0m[1;33m.[0m[0mvalues[0m[1;33m.[0m[0mmap[0m[1;33m([0m[0mmapper[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m--> 103 [1;33m    [1;36m1[0m[1;33m/[0m[1;36m0[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    104 [1;33m    [1;32mreturn[0m [0msigma_q[0m[1;33m.[0m[0mto_series[0m[1;33m([0m[0mindex[0m[1;33m=[0m[0mreturns[0m[1;33m.[0m[0mindex[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m    105 [1;33m[1;33m[0m[0m
[0m
ipdb> sigma_q
Index([nan, 'a', 'a', 'b', '8', '9', 'b', 'b', 'c', 'b',
       ...
       'a', 'b', 'a', 'b', 'a', 'a', 'b', 'b', 'a', 'b'],
      dtype='object', length=31156)
ipdb> sigma_q.to_series()
NaN    NaN
a        a
a        a
b        b
8        8
9        9
b        b
b        b
c        c
b        b
b        b
a        a
b        b
b 

In [289]:
feats = x[symbol]['feats']

In [290]:
feats.columns

Index(['{"name": "int_ret"}',
       '{"name": "int_ret", "symbol": "{\"name\": \"time_bars\", \"size\": 60, \"symbol\": \"VIX.XO\"}"}',
       '{"name": "log_ret"}',
       '{"name": "log_ret", "symbol": "{\"name\": \"time_bars\", \"size\": 60, \"symbol\": \"VIX.XO\"}"}',
       '{"d": 0.3, "name": "ffd"}', '{"name": "int_ret", "periods": 25}',
       '{"name": "log_ret", "periods": 25}', '{"name": "roll", "window": 25}',
       '{"name": "rollimp", "window": 25}', '{"name": "amihud", "window": 25}',
       '{"name": "kyle", "window": 25}', '{"com": 25, "name": "volratio"}',
       '{"lag": 25, "name": "lag", "symbol": "{\"com\": 25, \"name\": \"volratio\"}"}',
       '{"name": "stdev", "symbol": "{\"name\": \"int_ret\", \"symbol\": \"{\\\"name\\\": \\\"time_bars\\\", \\\"size\\\": 60, \\\"symbol\\\": \\\"VIX.XO\\\"}\"}", "window": 25}',
       '{"name": "stdev", "symbol": "{\"name\": \"log_ret\", \"symbol\": \"{\\\"name\\\": \\\"time_bars\\\", \\\"size\\\": 60, \\\"symbol\\\": \\\"VI

In [291]:
feats

Unnamed: 0_level_0,"{""name"": ""int_ret""}","{""name"": ""int_ret"", ""symbol"": ""{\""name\"": \""time_bars\"", \""size\"": 60, \""symbol\"": \""VIX.XO\""}""}","{""name"": ""log_ret""}","{""name"": ""log_ret"", ""symbol"": ""{\""name\"": \""time_bars\"", \""size\"": 60, \""symbol\"": \""VIX.XO\""}""}","{""d"": 0.3, ""name"": ""ffd""}","{""name"": ""int_ret"", ""periods"": 25}","{""name"": ""log_ret"", ""periods"": 25}","{""name"": ""roll"", ""window"": 25}","{""name"": ""rollimp"", ""window"": 25}","{""name"": ""amihud"", ""window"": 25}",...,"{""encoding"": ""quantile"", ""method"": ""lz"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""log_ret\""}"", ""window"": 50}","{""encoding"": ""quantile"", ""konto_len"": 50, ""method"": ""konto"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""log_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""method"": ""shannon"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""int_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""method"": ""plugin"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""int_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""method"": ""lz"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""int_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""konto_len"": 50, ""method"": ""konto"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""int_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""method"": ""shannon"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""log_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""method"": ""plugin"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""log_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""method"": ""lz"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""log_ret\""}"", ""window"": 50}","{""encoding"": ""sigma"", ""konto_len"": 50, ""method"": ""konto"", ""name"": ""entropy"", ""symbol"": ""{\""name\"": \""log_ret\""}"", ""window"": 50}"
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-04-13 11:08:00,,,,,,,,,,,...,,,,,,,,,,
2006-04-13 13:19:00,-0.001633,,-0.001633,,,,,,,,...,,,,,,,,,,
2006-04-17 09:32:00,-0.002266,,-0.002266,,,,,,,,...,,,,,,,,,,
2006-04-17 11:04:00,0.000882,,0.000882,,,,,,,,...,,,,,,,,,,
2006-04-17 13:21:00,-0.008598,,-0.008598,,,,,,,,...,,,,,,,,,,
2006-04-17 14:15:00,-0.004072,,-0.004072,,,,,,,,...,,,,,,,,,,
2006-04-17 16:01:00,0.003818,,0.003818,,,,,,,,...,,,,,,,,,,
2006-04-18 10:11:00,0.003424,,0.003424,,,,,,,,...,,,,,,,,,,
2006-04-18 11:53:00,0.004673,,0.004673,,,,,,,,...,,,,,,,,,,
2006-04-18 12:54:00,0.003522,,0.003522,,,,,,,,...,,,,,,,,,,


In [14]:
import pandas as pd
series = pd.Series([1,2,34,4,5, 8, 9, 100, 50])
df = pd.DataFrame({'Close': series})
nplog = np.log(series).diff()






NameError: name 'np' is not defined

In [15]:
column = "Close"
window = 4
rolly = df[column].rolling(window)
(df[column] - rolly.mean()) / rolly.std()



0         NaN
1         NaN
2         NaN
3   -0.393518
4   -0.410702
5   -0.332907
6    1.050210
7    1.498992
8    0.189722
Name: Close, dtype: float64

In [114]:
dd = series.astype(str)

In [140]:
majority = lambda x:np.sum(x)
df = pd.DataFrame({'a' : [1,1,1,1,1,2,1,2,2,2,2]})
# df['a'] = df['a'].rolling(3).apply(majority)
df['b'] = list('aaaababbbba')

cat = pd.Categorical(df['b'])
df['b'] = pd.Series(cat.codes).rolling(3).apply(majority, raw=True)
df['b'] = df['b'].map(pd.Series(cat.categories))
print(df)


    a    b
0   1  NaN
1   1  NaN
2   1    a
3   1    a
4   1    b
5   2    b
6   1  NaN
7   2  NaN
8   2  NaN
9   2  NaN
10  2  NaN


In [126]:
pd.Series(cat.codes)

0     0
1     0
2     0
3     0
4     1
5     0
6     1
7     1
8     1
9     1
10    0
dtype: int8

In [145]:
ss

0       NaN
1       NaN
2       NaN
3       NaN
4     some_
5     ome_s
6     me_st
7     e_str
8     _stri
9     strin
10    tring
dtype: object

In [142]:
series.rolling(3).mean()

0          NaN
1          NaN
2    12.333333
3    13.333333
4    14.333333
5     5.666667
6     7.333333
7    39.000000
8    53.000000
dtype: float64