In [None]:
# ThoroughBet Simulation


## Load necessary modules

In [1]:
import numpy as np
from datetime import datetime

from thbmodel.utils import timestamp, YEAR, settings
from thbmodel.utils.arrayview import ArrayView, TimeseriesView

from thbmodel.prediction.models.fit_model import TSModel
from thbmodel.prediction.models.model_parameters import ModelParameters
from thbmodel.prediction.models.factor_management import FactorList
from thbmodel.prediction.models.preprocessing import load_slices

from thbmodel.prediction.models.prediction import factornames_trimmed
from thbmodel.prediction.models.parameters import factor_build_end

### Load data

In [2]:
av = ArrayView.from_file(settings.paths.join('racing_data_azd.av.bcolz'))

In [3]:
settings.paths.join('racing_data_azd.av.bcolz')

'/home/oleg/thbmodel/racing_data/racing_data_azd.av.bcolz'

In [4]:
tsav = load_slices(settings.paths.join())

### Preprocessing

In [5]:
pars = ModelParameters(av, oos_start=float(timestamp('2017-04-01')), depth=1, lmbd=10, verbose=True)

In [6]:
print 'is1_start  ',(timestamp(pars.build_end))
print 'is2_start  ',(timestamp(av.start_time[pars.is2][0]))
print 'oos_start  ',(timestamp(pars.oos_start))
print 'av_end  ',(timestamp(pars._av_end))

is1_start   2016-04-01 00:00:00.000000
is2_start   2016-04-01 12:40:00.000000
oos_start   2017-04-01 00:00:00.000000
av_end   2017-08-22 02:30:00.000000


In [7]:
fl = FactorList.from_av(av, factornames_trimmed)

In [8]:
fl.preprocess(pars)
factors = fl.asmatrix()

INFO:models:Getting factors from av and rescaling...


. . . . .

INFO:models:Filling in missing values...
INFO:models:Computing each factor as linear combination of all the others...


 . . . . .

INFO:models:Number of missing patterns: 4024


 . . . .


In [9]:
def new_factors_array (X, mask =pars.model_mask):
    
    
    factors_new = np.zeros((X.shape[1], mask.shape[0]))
    factors_new[:, mask] = X.T
    
    return factors_new

In [10]:
# создаем дата фрейм из данных 
def DF(mask, factors, av, factors_names, other_names):
    
    import pandas as pd
    
    df = pd.DataFrame(data =factors[:, mask].T , columns = factors_names)
    for col in other_names :
        
        df[col] = av[col][mask]
        
    return df

In [11]:
import pandas as pd
pd.set_option('display.max_columns', 90)
n,m = factors.shape
col_names = ['f{}'.format(i) for i in range(1,n+1)]

df = DF (pars.model_mask, factors, av, col_names, ['event_id', 'runner_id', 'result', 'start_time', 
                                                           'jockey', 'trainer', 'prize'])
df['is1'] = pars.is1 [pars.model_mask]
df['is2'] = pars.is2 [pars.model_mask]
df['oos'] = pars.oos [pars.model_mask]


df.head()

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,event_id,runner_id,result,start_time,jockey,trainer,prize,is1,is2,oos
0,0.367198,-2.822814,1.042788,-1.449,-0.399689,1.51956,1.120796,-0.903438,2.149808,0.454737,0.397418,2.01894,-0.04112,-1.864951,0.90344,-0.595091,-2.3511,0.791641,-0.371619,-0.134965,0.383271,-1.827844,-0.662651,1.423997,0.518412,0.633018,-0.217998,-0.046744,1.832861,0.362147,1.789398,0.713323,0.847637,2.19771,0.333612,0.992382,2.005222,1.745112,-2.169849,0.024419,0.516602,0.838355,-0.231825,0.510499,-0.818574,1.69085,1.153778,0.0,-0.178424,1.606775,1.276101,0.903439,-0.903432,1.539463,307976,166586,4,1459514000.0,4454,797,3899.0,True,True,False
1,0.367198,-2.822814,1.146508,-0.950907,0.134657,1.279091,0.709838,0.51118,1.163663,0.55467,0.589283,2.676868,-0.85392,-1.76216,-0.51118,0.35515,0.32579,-0.024739,0.462871,0.297558,1.50307,-1.92574,-0.642401,2.296713,-0.518494,1.678214,-0.325527,0.081803,-0.721549,3.691803,1.126171,1.576948,0.329637,0.13826,1.86692,1.105528,1.685527,1.745112,-1.926777,0.024419,-0.399449,0.838355,1.043211,0.510499,-0.881552,0.215314,0.760771,0.0,-0.103089,1.421764,0.38206,-0.51118,0.511188,-0.341251,307976,383786,1,1459514000.0,4764,3941,3899.0,True,True,False
2,0.367198,-2.269686,1.109573,1.041469,-1.246235,0.959507,0.533713,-0.834292,2.054531,0.589805,0.781147,-0.425354,-0.30843,-1.495752,0.834292,-0.561211,0.570014,-1.385371,0.230456,0.295614,0.2217,-1.316387,-1.368199,0.486579,1.95442,0.373406,-0.280811,0.21035,0.824291,0.09982,0.924145,0.69513,0.329637,-2.094361,0.463911,1.114437,1.611524,1.745112,-1.271665,-0.100011,-0.118486,-1.065489,-0.231825,0.510499,2.544475,-0.108367,-0.777575,0.0,-0.034824,0.36674,0.442908,0.834292,-0.834286,0.687687,307976,277325,2,1459514000.0,498,1304,3899.0,True,True,False
3,0.367198,-2.082636,-0.546706,1.041469,-0.177928,0.807879,1.825297,0.377624,-0.891473,2.35905,0.690858,-1.025506,2.586447,-1.542717,-0.377623,0.483454,0.439499,-1.929624,-0.011781,0.218581,1.02822,-0.193052,-0.39014,-0.525911,0.543346,0.061765,-0.134161,0.21035,-1.481083,-1.201241,-0.343101,-0.052652,-0.188364,0.02454,-0.598031,-0.847758,1.648525,1.745112,-0.854125,-0.057719,-0.967518,-1.950998,-0.231825,0.510499,-0.705213,-0.063721,0.946241,0.0,0.028652,0.979326,3.985849,-0.377623,0.377629,1.750346,307976,384043,5,1459514000.0,8604,10803,3899.0,True,True,False
4,0.367198,-1.620791,0.904502,1.041469,-0.645139,0.037615,-0.229496,-0.471971,1.330716,1.71716,0.167181,-0.232777,-0.30843,-0.513871,0.471972,-0.296116,0.195127,-1.385371,0.358433,0.311468,1.782793,-1.105682,-0.162607,-0.525911,1.667236,1.301977,-0.517036,0.21035,0.340718,4.356093,0.522335,1.51712,-0.188364,-1.785856,1.20081,0.854792,0.270584,1.745112,-0.640691,0.049978,-0.749274,-0.453683,-0.231825,0.510499,2.587987,-0.043631,1.434019,0.0,-0.145211,1.003482,0.699005,0.471972,-0.471966,0.36518,307976,282625,8,1459514000.0,8898,4834,3899.0,True,True,False


In [12]:
df.is1.sum(), df.is2.sum(), df.oos.sum()

(131599, 131599, 59986)

In [13]:
df.shape, df[col_names].dropna().shape , df['event_id'].unique().shape

((191585, 64), (191585, 54), (18563,))

In [14]:
from sklearn.decomposition import PCA
seed =7
n_components =50
pca = PCA(n_components =n_components, random_state =seed)
name_pca = ['pca_f%s'%i for i in range(1, n_components+1)]

df_pca = pd.DataFrame(data = pca.fit_transform(df.loc[:,col_names]), columns =name_pca) 
#df_pca = df_pca.append(pd.DataFrame(data = pca.transform(df.ix[df.oos.values,'f1':'f57']), columns =name_pca))

df_pca.head()

Unnamed: 0,pca_f1,pca_f2,pca_f3,pca_f4,pca_f5,pca_f6,pca_f7,pca_f8,pca_f9,pca_f10,pca_f11,pca_f12,pca_f13,pca_f14,pca_f15,pca_f16,pca_f17,pca_f18,pca_f19,pca_f20,pca_f21,pca_f22,pca_f23,pca_f24,pca_f25,pca_f26,pca_f27,pca_f28,pca_f29,pca_f30,pca_f31,pca_f32,pca_f33,pca_f34,pca_f35,pca_f36,pca_f37,pca_f38,pca_f39,pca_f40,pca_f41,pca_f42,pca_f43,pca_f44,pca_f45,pca_f46,pca_f47,pca_f48,pca_f49,pca_f50
0,5.85706,3.96011,1.197776,1.509141,1.548961,0.229756,-2.092466,-0.768923,1.843824,0.526431,0.141392,-1.895666,-0.99783,-1.259986,-1.666113,1.161771,0.423352,-0.655191,0.086684,0.069927,1.29178,-0.24833,0.09711,-0.633195,0.923213,-0.092589,-0.100181,-0.039764,0.09153,-0.006046,-0.572252,1.090489,-0.281813,-0.043857,-0.110201,-0.235521,1.048968,-0.379104,0.083641,-0.652423,0.341189,0.372509,0.131216,-0.188307,-0.180405,0.017501,-0.121157,0.645199,0.051863,0.091988
1,7.704167,-0.451885,-0.975158,-0.04487,-0.601668,1.359202,-0.470939,-1.366731,1.606,-0.411714,-0.646616,-0.183469,-0.611795,-0.996634,-1.696452,-0.2175,-0.700865,0.47998,-0.578627,0.113507,0.248621,0.650738,-1.092617,-0.859617,0.596831,-0.542108,-0.188002,-0.807099,-0.342783,-0.021958,-0.45112,0.618254,-0.538517,-0.073784,0.038227,0.927769,-0.370118,0.06853,-0.163859,-0.052606,-0.397528,0.087951,-0.312525,-0.406633,-0.405972,0.235588,0.380937,-0.469784,0.021403,0.080342
2,3.837558,1.400571,0.357207,1.51308,1.36417,-1.127538,0.577371,2.138063,-1.049425,1.157921,2.134747,1.155988,-0.967698,1.008603,-1.733491,0.660901,-1.574509,0.294261,-0.613549,0.443753,-0.761914,1.604995,-0.057513,-1.161466,-0.051578,0.223283,-1.698734,0.006007,-0.490987,-0.881975,-0.370787,-0.214363,-0.406561,0.256742,0.320433,0.369017,1.695597,0.032526,-0.009471,0.090363,0.223611,0.030168,0.307682,0.252193,-0.11935,0.365714,-0.065368,0.107039,0.100619,0.104802
3,2.060212,-0.426516,2.314128,0.565045,0.520191,-0.959138,-2.140019,3.3769,-0.935472,2.816953,1.963168,0.109417,0.421464,-2.046681,-1.651683,-1.256548,0.425252,0.931787,-0.287671,-0.06366,-0.275034,-1.734383,-1.496649,2.299499,-0.999995,-0.696007,-0.117361,-0.36949,-1.265718,-0.73671,0.570268,1.275966,0.311797,0.217195,0.411096,1.510822,-0.74525,-0.535749,-0.494792,0.792972,-0.313527,0.289259,-0.087767,0.13406,-0.464869,0.482796,-0.404116,-0.077241,0.0696,-0.0056
4,5.197154,0.231656,-3.373365,-1.237842,1.38928,-1.186405,0.662654,1.852243,-0.696708,0.636053,1.147252,0.86374,0.870212,0.867187,-1.779064,-0.650132,-0.629182,1.284434,-0.649957,0.98661,-1.023873,1.173657,0.011262,-0.551504,-0.35521,-0.104547,-0.216697,0.266577,-0.474842,0.085959,0.53823,0.288889,0.343228,-0.153336,0.357224,0.003242,1.197687,-0.092582,-0.312634,-0.312359,-0.093204,-0.141603,-0.083421,0.246759,0.26857,0.730237,-0.009957,0.467583,-0.042181,0.263234


In [15]:
nor_vectors = np.diag(np.ones(df_pca[name_pca].shape[1]))

In [16]:
def matrix_cosine(matrix,vector):
    
    matrix_norms = np.linalg.norm(matrix, axis=1)
    vector_norm = np.linalg.norm(vector)
    return 1- np.divide(matrix.dot(vector),np.multiply(matrix_norms, vector_norm))

In [17]:
%%time 
name_cos = []
matrix_norms = np.linalg.norm(df_pca[name_pca].values, axis=1)
vector_norm = np.linalg.norm(nor_vectors[0])

for i, v  in enumerate(nor_vectors):
    
    df_pca['cos_' + str(i+1)] = 1- np.divide(df_pca[name_pca].values.dot(v),np.multiply(matrix_norms, vector_norm))
    name_cos.append('cos_' + str(i+1))

CPU times: user 10.4 s, sys: 1.27 s, total: 11.6 s
Wall time: 2.97 s


In [18]:
pd.set_option('display.max_columns', 110)
df_pca.tail()

Unnamed: 0,pca_f1,pca_f2,pca_f3,pca_f4,pca_f5,pca_f6,pca_f7,pca_f8,pca_f9,pca_f10,pca_f11,pca_f12,pca_f13,pca_f14,pca_f15,pca_f16,pca_f17,pca_f18,pca_f19,pca_f20,pca_f21,pca_f22,pca_f23,pca_f24,pca_f25,pca_f26,pca_f27,pca_f28,pca_f29,pca_f30,pca_f31,pca_f32,pca_f33,pca_f34,pca_f35,pca_f36,pca_f37,pca_f38,pca_f39,pca_f40,pca_f41,pca_f42,pca_f43,pca_f44,pca_f45,pca_f46,pca_f47,pca_f48,pca_f49,pca_f50,cos_1,cos_2,cos_3,cos_4,cos_5,cos_6,cos_7,cos_8,cos_9,cos_10,cos_11,cos_12,cos_13,cos_14,cos_15,cos_16,cos_17,cos_18,cos_19,cos_20,cos_21,cos_22,cos_23,cos_24,cos_25,cos_26,cos_27,cos_28,cos_29,cos_30,cos_31,cos_32,cos_33,cos_34,cos_35,cos_36,cos_37,cos_38,cos_39,cos_40,cos_41,cos_42,cos_43,cos_44,cos_45,cos_46,cos_47,cos_48,cos_49,cos_50
191580,-1.444425,5.087505,1.126967,2.278623,0.631691,-0.415173,0.835607,1.157551,0.557861,-0.774073,0.210253,0.323072,-1.100669,0.064099,-0.004531,-0.858423,0.954717,-0.131756,0.453657,-0.395604,0.646956,-0.965451,-0.055025,-0.310645,-0.212289,-0.145304,1.097934,0.677582,-0.436957,-0.268853,-0.655941,0.4411,0.773616,-0.579346,0.092973,-0.108444,-0.478808,-0.250509,1.024751,-0.249166,-0.509796,-0.091405,0.105899,0.206207,0.267074,-0.273436,0.269857,0.399345,0.041386,-0.266538,1.207313,0.26981,0.838251,0.672958,0.909336,1.059588,0.880069,0.833861,0.919932,1.1111,0.969823,0.953631,1.157975,0.9908,1.00065,1.123206,0.862973,1.01891,0.934888,1.05678,0.907145,1.138567,1.007897,1.044586,1.030469,1.020855,0.842418,0.902749,1.062715,1.038587,1.094145,0.936691,0.888966,1.083151,0.986656,1.015564,1.068722,1.035955,0.852922,1.035762,1.073169,1.013119,0.984801,0.970404,0.961668,1.039245,0.961268,0.942684,0.99406,1.038255
191581,1.500506,-1.228456,-2.140082,0.296515,1.081001,-0.062465,-1.207632,-0.377907,-0.39866,0.132273,0.224688,-0.681076,0.801244,-0.216909,0.011898,0.544167,0.413314,-0.04575,-0.099041,0.149407,0.460171,-0.308564,0.500216,0.001907,-0.294252,-0.528242,-0.142159,-0.61515,0.163948,-0.011479,-0.725583,0.307007,0.170469,0.19979,-0.140965,-0.087716,-0.124717,0.256195,-0.122912,0.448516,0.489347,-0.25333,0.164526,-0.142789,0.08948,0.026271,-0.333319,0.313471,0.09437,0.109045,0.625299,1.306765,1.534413,0.925955,0.730057,1.015599,1.301565,1.094369,1.099552,0.966969,0.943892,1.170076,0.799917,1.054166,0.997029,0.864113,0.896789,1.011424,1.024732,0.962691,0.885088,1.077053,0.875088,0.999524,1.073479,1.131911,1.035499,1.153613,0.95906,1.002866,1.18119,0.923335,0.957431,0.950109,1.035201,1.021904,1.031144,0.936024,1.030693,0.887998,0.877802,1.063261,0.958915,1.035657,0.977655,0.99344,1.083235,0.921721,0.976434,0.97277
191582,-1.281335,-3.209143,-1.639217,0.235486,-0.287726,-0.79809,-0.156064,-0.122086,-1.035604,-0.436927,-0.037051,-0.023712,1.186104,0.403967,-0.01019,0.198394,0.607112,0.028868,0.098874,-0.51232,-0.329549,0.668133,-1.378366,-1.215965,0.925508,-0.137654,-0.726689,0.375228,0.675293,0.679233,0.60279,0.982015,0.139185,-0.32613,0.948633,-0.350836,-0.119009,0.021066,-0.435203,-0.165546,0.589349,-0.038039,-0.034311,-0.093459,-0.03205,0.34931,-0.372578,-0.414684,0.028167,-0.094305,1.238499,1.597329,1.305113,0.956168,1.053555,1.148551,1.029049,1.022724,1.19276,1.081327,1.006896,1.004414,0.779226,0.924808,1.001897,0.963072,0.886996,0.994627,0.981596,1.09536,1.06134,0.875638,1.25656,1.226332,0.827732,1.025622,1.135261,0.930158,0.874305,0.873572,0.887801,0.817214,0.974093,1.060704,0.823428,1.065302,1.022152,0.996079,1.081006,1.030814,0.890302,1.00708,1.006386,1.017396,1.005966,0.934982,1.069349,1.077187,0.994757,1.017553
191583,-0.201997,-0.518117,0.741988,-1.176537,-0.718039,-0.3258,0.015987,-0.673388,-0.227488,1.071468,-0.523873,0.291634,1.270639,-0.702644,0.025852,0.564747,-0.300643,-0.511445,-0.100966,-0.217833,-0.043746,-0.787384,0.638208,0.066403,-0.111742,0.415559,-0.302603,-0.466739,0.59669,-0.158156,-0.833965,-0.298304,0.255059,0.907149,-0.342815,-0.230956,0.065937,0.117942,-0.568669,0.511732,0.196751,0.681716,-0.17248,-0.114062,-0.120057,0.432003,0.532794,-0.141644,-0.014061,0.254815,1.055622,1.142669,0.795686,1.323971,1.197719,1.089712,0.995598,1.185424,1.062641,0.704961,1.144254,0.919696,0.650117,1.19348,0.992881,0.844491,1.082785,1.140832,1.027802,1.059983,1.012046,1.216814,0.824263,0.981715,1.030769,0.885572,1.083325,1.128521,0.835696,1.04355,1.229641,1.082141,0.929767,0.750208,1.094397,1.063596,0.981844,0.967523,1.156589,0.85909,0.945823,0.812283,1.047494,1.031408,1.033059,0.881044,0.85329,1.039003,1.003872,0.929834
191584,-1.537675,-1.649772,1.126688,4.406929,-1.234997,4.369664,1.86271,0.502706,-1.300297,-1.861286,0.328113,1.458786,1.554188,-0.588189,0.034221,1.278483,0.304085,1.28255,-0.358059,0.546025,1.047111,-0.119416,-1.758624,0.054555,0.299882,-0.301529,-0.120289,0.091938,0.428563,0.397748,-0.974673,-0.957974,-0.529166,1.114031,-1.500713,2.498339,-0.031876,0.592651,1.020801,0.249505,1.522516,-3.810703,0.432612,0.360878,-0.394921,0.240143,0.144262,-0.08983,1.833597,-0.210979,1.151536,1.162583,0.888966,0.565703,1.121707,0.569376,0.816433,0.950459,1.128142,1.183427,0.967665,0.856239,0.846837,1.057965,0.996628,0.874007,0.970033,0.873606,1.035286,0.94619,0.896809,1.011768,1.17331,0.994624,0.970447,1.029715,1.011854,0.99094,0.957766,0.960802,1.096053,1.094407,1.052149,0.890214,1.147893,0.753792,1.003141,0.941595,0.899402,0.975412,0.849958,1.375539,0.957367,0.964436,1.038919,0.976334,0.985783,1.008853,0.819302,1.020792


In [19]:
is1 = pars.is1.copy()
is2 = pars.is2.copy()
oos = pars.oos.copy()

#### Old Model

In [20]:
tsmod = TSModel(factors, tsav, pars)

In [21]:
tsmod.params.verbose =False
tsmod.fit_slices()
print tsmod.stats1.ll

  stats, self.step2probs[sl] = self.concat_and_fit(strata, result, nonrunner, [fback, flay, np.log(probs)], ts_idx, valid2, verbose=False, step=2)


[[-1831.78747073 -1831.78747073 -2024.21796183]
 [-1837.01722248 -1837.01722248 -2018.80877538]
 [-1841.70057523 -1841.70057523 -2015.15322262]
 [-1844.67347262 -1844.67347262 -2008.33238078]
 [-1860.10344473 -1860.10344473 -1988.12672263]
 [-1872.51767442 -1872.51767442 -1980.13462004]
 [-1905.16804423 -1905.16804423 -1966.06381442]
 [-1918.05565341 -1918.05565341 -1954.72039928]
 [-1921.50022849 -1921.50022849 -1956.27764913]
 [-1925.42082956 -1925.42082956 -1950.42820892]
 [           nan            nan            nan]]


In [22]:
print 'train {}  val {}  test  {}'.format(len(np.unique(av.event_id[is1])), 
                                          len(np.unique(av.event_id[is2])), len(np.unique(av.event_id[oos])))

train 12724  val 12724  test  5839


In [59]:
from thbmodel.prediction.tools.clustering import write_dic_to_simdata

#write_dic_to_simdata('new_factors_direction_new_code.p', new_model.step1probs, new_model.eff_coefs, oos, av =av)
write_dic_to_simdata('old_model.p', tsmod.step1probs, tsmod.eff_coefs, oos, av=av)

In [24]:
df_pca['event_id'] = df['event_id']
df_vol = df_pca.loc[:,name_cos].groupby(df_pca['event_id']).apply(lambda x: (x.max() -x.min())/x.count())
df_vol.head()

Unnamed: 0_level_0,cos_1,cos_2,cos_3,cos_4,cos_5,cos_6,cos_7,cos_8,cos_9,cos_10,cos_11,cos_12,cos_13,cos_14,cos_15,cos_16,cos_17,cos_18,cos_19,cos_20,cos_21,cos_22,cos_23,cos_24,cos_25,cos_26,cos_27,cos_28,cos_29,cos_30,cos_31,cos_32,cos_33,cos_34,cos_35,cos_36,cos_37,cos_38,cos_39,cos_40,cos_41,cos_42,cos_43,cos_44,cos_45,cos_46,cos_47,cos_48,cos_49,cos_50
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
307976,0.144777,0.054453,0.062913,0.04566,0.029995,0.033158,0.046675,0.060975,0.043525,0.06651,0.046428,0.056831,0.031593,0.040292,0.024414,0.02931,0.030102,0.025903,0.060427,0.045117,0.030902,0.038106,0.053697,0.038824,0.030931,0.037188,0.04406,0.017152,0.026434,0.026105,0.012632,0.035163,0.027509,0.032664,0.022877,0.0454,0.036207,0.024557,0.010264,0.015531,0.009274,0.009347,0.007976,0.007595,0.023868,0.015505,0.00822,0.011212,0.003518,0.004805
307977,0.231648,0.075873,0.163627,0.067752,0.096934,0.07657,0.094836,0.089227,0.100437,0.079232,0.065245,0.102205,0.070534,0.126267,0.005348,0.061052,0.039587,0.056867,0.045325,0.072013,0.038181,0.039908,0.069739,0.047088,0.030456,0.083071,0.056348,0.015023,0.020812,0.042435,0.043904,0.044676,0.037597,0.019589,0.018149,0.062858,0.023539,0.020366,0.030555,0.026619,0.015596,0.033433,0.033726,0.013585,0.023353,0.021174,0.018434,0.013148,0.033058,0.007537
307978,0.096777,0.120978,0.084204,0.076561,0.066817,0.038958,0.052988,0.047698,0.037219,0.038699,0.033762,0.067809,0.03273,0.028725,0.001139,0.034558,0.030364,0.021245,0.021501,0.029264,0.029841,0.016305,0.047513,0.019258,0.025253,0.019719,0.034976,0.040255,0.040525,0.025351,0.02522,0.01605,0.024247,0.02785,0.017841,0.03197,0.019027,0.027695,0.023332,0.020887,0.024082,0.011643,0.02363,0.017175,0.014331,0.019343,0.012134,0.019747,0.003429,0.010727
307979,0.243865,0.141587,0.081184,0.120555,0.073365,0.070694,0.175401,0.080589,0.057557,0.150191,0.113239,0.122618,0.117566,0.106094,0.004738,0.105067,0.081036,0.120918,0.034787,0.076271,0.089245,0.04292,0.039525,0.053066,0.055314,0.01746,0.033989,0.020679,0.039538,0.017922,0.032443,0.031761,0.028857,0.06893,0.031108,0.064555,0.038712,0.035631,0.050732,0.041367,0.020321,0.032119,0.007229,0.011273,0.025259,0.030398,0.027554,0.018264,0.008251,0.012427
307980,0.217316,0.081649,0.096528,0.056985,0.07457,0.083053,0.143934,0.058488,0.079646,0.056446,0.117879,0.078131,0.067005,0.046966,0.002688,0.07078,0.060261,0.061558,0.179494,0.115593,0.036163,0.061185,0.026935,0.057415,0.025322,0.023265,0.030453,0.069548,0.051887,0.07427,0.020322,0.04988,0.048118,0.058501,0.036599,0.034582,0.063666,0.049633,0.01988,0.052039,0.019956,0.012562,0.039171,0.019817,0.020939,0.023685,0.01791,0.025665,0.004418,0.019906


In [25]:
from sklearn.mixture import BayesianGaussianMixture
BGM = BayesianGaussianMixture(n_components =2, random_state =seed)
BGM.fit(df_vol.loc[:,name_cos].values)
df_vol['cluster'] = BGM.predict(df_vol.loc[:,name_cos].values)
score = BGM.score(df_vol.loc[:,name_cos].values)
print score

150.197580786


In [26]:
df_vol['cluster'].value_counts()

0    11416
1     7147
Name: cluster, dtype: int64

In [27]:
df_vol.loc[:,name_cos].groupby(df_vol.cluster).mean()

Unnamed: 0_level_0,cos_1,cos_2,cos_3,cos_4,cos_5,cos_6,cos_7,cos_8,cos_9,cos_10,cos_11,cos_12,cos_13,cos_14,cos_15,cos_16,cos_17,cos_18,cos_19,cos_20,cos_21,cos_22,cos_23,cos_24,cos_25,cos_26,cos_27,cos_28,cos_29,cos_30,cos_31,cos_32,cos_33,cos_34,cos_35,cos_36,cos_37,cos_38,cos_39,cos_40,cos_41,cos_42,cos_43,cos_44,cos_45,cos_46,cos_47,cos_48,cos_49,cos_50
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0,0.097939,0.074175,0.07721,0.068026,0.057328,0.047752,0.054525,0.050217,0.044873,0.043701,0.04259,0.043218,0.041622,0.039548,0.008063,0.039244,0.039371,0.037194,0.036135,0.036261,0.030889,0.031455,0.034481,0.031867,0.031955,0.03027,0.030094,0.029789,0.029644,0.028319,0.027647,0.027202,0.025984,0.023367,0.023381,0.024058,0.022306,0.021846,0.020363,0.020878,0.019202,0.016231,0.017108,0.014888,0.015992,0.01568,0.013736,0.011293,0.008984,0.008627
1,0.165674,0.104754,0.124222,0.093704,0.089604,0.07173,0.085924,0.079853,0.073683,0.067781,0.064487,0.066417,0.068579,0.063326,0.006912,0.061632,0.060706,0.055124,0.05632,0.057753,0.050335,0.050997,0.053655,0.049037,0.046867,0.047256,0.047144,0.044111,0.042791,0.043789,0.04228,0.040422,0.039668,0.035449,0.035868,0.036203,0.034403,0.034199,0.031887,0.032304,0.026755,0.024453,0.029246,0.022937,0.023416,0.021811,0.022646,0.017152,0.013106,0.013148


In [28]:
df_vol['cluster'].replace(1, -1, inplace =True)
df_pca = df_pca.join(df_vol.cluster, on ='event_id')

In [30]:
#df_pca['event_id'] = df['event_id']
df_cos = df_pca.loc[df_pca['cluster'] ==0,name_cos].groupby(df_pca['event_id']).mean()
df_cos.head()

Unnamed: 0_level_0,cos_1,cos_2,cos_3,cos_4,cos_5,cos_6,cos_7,cos_8,cos_9,cos_10,cos_11,cos_12,cos_13,cos_14,cos_15,cos_16,cos_17,cos_18,cos_19,cos_20,cos_21,cos_22,cos_23,cos_24,cos_25,cos_26,cos_27,cos_28,cos_29,cos_30,cos_31,cos_32,cos_33,cos_34,cos_35,cos_36,cos_37,cos_38,cos_39,cos_40,cos_41,cos_42,cos_43,cos_44,cos_45,cos_46,cos_47,cos_48,cos_49,cos_50
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
307976,0.988414,0.988645,0.99414,0.989843,0.996406,1.011892,1.006646,0.993953,0.99535,0.999831,1.000067,0.980039,1.007508,0.994537,1.2227,0.993721,1.008357,0.99781,1.012979,1.008961,1.005076,0.986891,0.995002,1.003647,1.005229,1.000653,1.019289,0.994982,1.002059,1.002861,1.001186,0.994627,1.001351,1.005526,0.991811,0.983165,0.996244,1.011836,0.998055,1.001565,1.002835,0.996633,0.997748,1.000032,1.005532,0.999333,1.001435,1.00151,1.00001,0.993001
307978,0.977653,1.011403,0.976987,0.984892,1.020286,1.001485,0.997583,0.985292,0.986278,0.974404,0.997478,0.999815,0.986559,0.995284,0.998771,1.010327,1.013149,0.99291,1.004303,1.003529,1.00963,0.9908,1.038725,1.010206,1.003678,0.984607,0.995426,1.002218,0.986308,0.996876,0.992073,0.994901,0.997412,1.020598,1.012322,1.001046,0.998069,1.000138,0.994408,0.999054,0.999987,1.0032,0.998207,0.996711,1.000459,1.007832,1.007171,1.010721,0.998541,0.982221
307985,0.997979,1.071316,0.979639,0.991343,1.037544,0.987281,0.987447,0.977451,0.98777,0.998586,0.997626,0.991482,0.992418,1.001754,0.998602,1.008517,1.016668,0.999281,0.998823,1.007843,0.999695,1.014818,1.007865,0.995074,1.00536,1.004746,1.008125,1.002478,0.994192,0.996701,0.991535,1.001664,1.003043,0.988964,0.990962,0.994497,1.003175,0.999149,1.000627,0.993417,1.006556,0.996431,0.999321,1.000615,1.00303,1.003377,0.992434,0.986227,1.002066,1.021674
307987,0.950537,1.008601,1.018129,0.962967,0.995113,1.003133,0.99766,0.991108,0.992396,1.010108,0.985988,1.016781,1.002299,0.992642,0.998992,1.013552,0.972181,1.016611,0.99863,0.996833,0.996606,0.993583,1.011387,1.008874,1.007286,1.010775,1.020373,0.987831,1.003259,1.007772,0.992522,1.009825,1.000076,1.020847,0.993539,0.996589,0.980304,0.984045,0.990719,0.995562,1.008418,0.99094,1.002432,1.002223,1.00338,1.002325,0.997383,1.005588,0.996545,0.996133
307990,1.009459,1.003438,1.029966,1.02438,1.003228,0.966697,1.004191,0.987465,0.997305,1.005892,0.974138,1.006622,0.997652,0.996718,0.998598,1.023699,0.997235,0.990413,1.004069,0.999189,1.013975,1.001974,1.00122,0.992875,0.969723,0.990975,1.005739,0.999914,1.011274,0.9948,1.004379,1.000889,1.008235,0.987859,0.999706,0.99867,0.999433,1.005472,0.993656,1.000837,0.988286,1.001691,0.999249,1.01067,0.999971,0.997538,1.001656,1.011763,0.999863,0.984459


In [46]:
from sklearn.mixture import BayesianGaussianMixture
BGM = BayesianGaussianMixture(n_components =2, random_state =seed)
BGM.fit(df_cos.loc[:,name_cos].values)
df_cos['cluster'] = BGM.predict(df_cos.loc[:,name_cos].values)
score = BGM.score(df_cos.loc[:,name_cos].values)
print score

160.365357076


In [47]:
df_cos.loc[:,name_cos].groupby(df_cos['cluster']).mean()

Unnamed: 0_level_0,cos_1,cos_2,cos_3,cos_4,cos_5,cos_6,cos_7,cos_8,cos_9,cos_10,cos_11,cos_12,cos_13,cos_14,cos_15,cos_16,cos_17,cos_18,cos_19,cos_20,cos_21,cos_22,cos_23,cos_24,cos_25,cos_26,cos_27,cos_28,cos_29,cos_30,cos_31,cos_32,cos_33,cos_34,cos_35,cos_36,cos_37,cos_38,cos_39,cos_40,cos_41,cos_42,cos_43,cos_44,cos_45,cos_46,cos_47,cos_48,cos_49,cos_50
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
0,1.004269,1.018877,0.997403,0.993808,1.001682,1.003462,0.999434,0.991492,0.991434,0.99704,1.003329,1.004479,1.000161,0.997978,0.999981,1.005802,1.001181,1.000903,1.000865,1.004017,1.001215,0.999193,1.001317,1.000748,1.000415,0.999339,1.00331,1.000947,1.00043,0.999522,0.998943,0.99927,1.002183,1.000815,0.997612,0.999787,0.999066,0.99954,1.001159,1.000978,1.000252,0.998533,0.999536,1.000305,0.999903,0.99999,0.999782,0.997532,1.000092,1.000813
1,1.008096,1.006341,0.997527,1.006281,1.008041,1.020645,1.007494,0.997855,0.99669,1.001991,0.996349,0.998685,0.998006,0.991821,1.000169,1.003755,0.996806,1.00625,1.006554,0.995384,0.99995,0.998968,1.000927,1.007253,0.997407,1.001522,1.002429,1.002453,1.001728,0.999592,1.000007,1.000112,1.002358,1.001174,1.001584,0.99945,1.003202,0.9999,0.999798,1.001196,0.99861,1.000964,0.99955,0.998436,1.000637,0.999543,1.000575,1.001211,0.999721,0.998691


In [48]:
df_cos['cluster'].value_counts()

0    6306
1    5110
Name: cluster, dtype: int64

In [62]:
clusters = df_cos.cluster.append(df_vol.loc[df_vol['cluster'] == -1, 'cluster'])
clusters.value_counts()

-1    7147
 0    6306
 1    5110
Name: cluster, dtype: int64

#### build the clusters model

In [50]:
clusters_pars = {}
clusters_models ={}
train_val_test = {}
data = clusters
df_cl = data.value_counts()
for cl in data.value_counts().index:
    
    #clusters_pars[cl] = ModelParameters(av, oos_start=factor_build_end+YEAR, depth=1, lmbd=10, verbose=True)
    
    
    print cl
    mask_cluster = np.in1d(av.event_id, data.index[data == cl])
    #clusters_pars[cl] = cut_model(pars, mask_cluster)
    cluster_is1 = is1 & mask_cluster
    cluster_is2 = is2 & mask_cluster
    cluster_oos = oos & mask_cluster
        
    train_events = np.unique(av.event_id[cluster_is1])
    val_events   = np.unique(av.event_id[cluster_is2])
    test_events  = np.unique(av.event_id[cluster_oos])
    
    if np.all(np.array([len(train_events), len(val_events), len(test_events)]) != 0):
        clusters_models[cl] = TSModel(factors, tsav, pars)
        clusters_models[cl].params.is1 = cluster_is1
        clusters_models[cl].params.is2 = cluster_is2
        clusters_models[cl].params.oos = cluster_oos
        train_val_test[cl] = train_events, val_events, test_events
    
        
        print 'cluster {}  number  {}'.format(cl, df_cl[cl])
        print 'LL  {}          {}            {}'.format (len(train_events), len(val_events), len(test_events))
        clusters_models[cl].fit_slices()
        print clusters_models[cl].stats1.ll
    

-1
cluster -1  number  7147
LL  4823          4823            2324
[[-1504.68320006 -1504.68320006 -1673.87782651]
 [-1510.18562377 -1510.18562377 -1670.81168609]
 [-1513.81703299 -1513.81703299 -1669.72991606]
 [-1518.09146469 -1518.09146469 -1665.47625132]
 [-1531.60888307 -1531.60888307 -1659.24675276]
 [-1546.76587403 -1546.76587403 -1651.60863063]
 [-1581.58817807 -1581.58817807 -1646.31600427]
 [-1596.711076   -1596.711076   -1638.97201399]
 [-1598.69993535 -1598.69993535 -1640.48116254]
 [-1604.2201285  -1604.2201285  -1639.80829347]
 [           nan            nan            nan]]
0
cluster 0  number  6306
LL  4166          4166            2140
[[-2111.17599683 -2111.17599683 -2309.88529998]
 [-2116.51039473 -2116.51039473 -2301.20660919]
 [-2121.35522429 -2121.35522429 -2299.31253663]
 [-2127.17330444 -2127.17330444 -2292.52670492]
 [-2147.79932909 -2147.79932909 -2273.20512556]
 [-2157.27113958 -2157.27113958 -2268.83171058]
 [-2197.32479936 -2197.32479936 -2247.21081897]
 [-

In [41]:
from thbmodel.prediction.tools.clustering import ll_diff

In [51]:
clusters_list = clusters.unique()
clusters_list

array([ 0,  1, -1])

In [52]:
cl_keys = clusters_models.keys()
tsmod.step2probs.shape, clusters_models[cl_keys[0]].step2probs.shape

((11, 1805375), (11, 1805375))

##### compare with old model

In [53]:
total_ll = [0.0, 0.0, 0.0]
total_number = 0
good_clusters = []
threshold = 1.
threshold_size_cluster = 800
threshorld_dif = 1.2

for cluster in train_val_test.keys():
    
    train, val, test = train_val_test[cluster] #events for each cluster
    ints = clusters_models[cluster].params.ts_idx
    cluster_mask = np.in1d(av.event_id, np.hstack((train, val,test)))[ints] #mask for each cluster
    #replace the win probability if the event in the cluster, another use the old model
    prob_mix = np.where(cluster_mask, clusters_models[cluster].step2probs, tsmod.step2probs)
    prob_train = np.where(np.in1d(av.event_id, train), clusters_models[cluster].step2probs, tsmod.step2probs)
    print prob_mix[:,np.in1d(av.event_id, train)].sum()
    print prob_train.sum()
    print 'cluster {}  number  {}'.format(cluster, clusters.value_counts()[cluster])
    
    print 'diff_likelihood'
    mean_diff_ll = np.nanmean(ll_diff(prob_mix, tsmod.step2probs, train, val, test, ints, av =av, tsav =tsav), axis =0)
    print mean_diff_ll
    mean_ll = np.nanmean(clusters_models[cluster].stats2.ll, axis =0)
    _flag = (mean_diff_ll[1] >threshold) & (len(train) > threshold_size_cluster) & (mean_ll[0] * threshorld_dif < mean_ll[2])
    if _flag:
        good_clusters.append(cluster)
    total_ll += mean_diff_ll * clusters.value_counts()[cluster]
    total_number += clusters.value_counts()[cluster]
print '......................'
print 'total likelihood ', total_ll/total_number
print '......................'
print 'good clusters  ', good_clusters

40030.0
173284.0
cluster 0  number  6306
diff_likelihood
[  4.49418077   4.49418077 -18.76223263]
29617.0
173284.0
cluster 1  number  5110
diff_likelihood
[ 5.7914625   5.7914625   7.22083245]
45991.0
173284.0
cluster -1  number  7147
diff_likelihood
[ 2.76451907  2.76451907  6.22465915]
......................
total likelihood  [ 4.18535232  4.18535232 -1.98936305]
......................
good clusters   [0, 1, -1]


##### write result to file

In [54]:
cl_keys = clusters_models.keys()
cl_keys

[0, 1, -1]

In [55]:
clusters_models[cl_keys[0]].__dict__.keys()

['step2probs',
 'step1probs',
 'factors',
 'eff_coefs',
 'stats1',
 'tsav',
 'stats2',
 'params']

In [56]:
def class_to_dict(dict_models, atr):
    cl_dict ={}
    for cl in dict_models.keys():
        cl_dict[cl] = dict_models[cl].__dict__[atr]
    return cl_dict

In [57]:
_dic = class_to_dict(clusters_models, 'step1probs')
_dic.keys()

[0, 1, -1]

##### write result to file

In [60]:
write_dic_to_simdata('3_clustering_directions.p', tsmod.step1probs, tsmod.eff_coefs, oos,
                     av =av, 
                     data =df_cos['cluster'], 
                     cluster_step1probs =class_to_dict(clusters_models, 'step1probs'), 
                     cluster_coefs =class_to_dict(clusters_models, 'eff_coefs'), 
                     cluster_names =good_clusters )

In [61]:
write_dic_to_simdata('2_clustering_density.p', tsmod.step1probs, tsmod.eff_coefs, oos,
                     av =av, 
                     data =df_cos['cluster'], 
                     cluster_step1probs =class_to_dict(clusters_models, 'step1probs'), 
                     cluster_coefs =class_to_dict(clusters_models, 'eff_coefs'), 
                     cluster_names =[-1] )

In [81]:
factors_student ={}
for cl in cl_keys:
    #print clusters_models[cl].stats1.student_t[:,2:].mean(axis =0)
    print
    
    factors_student[cl] = [(x,y) for x,y in zip(col_names,clusters_models[cl].stats1.student_t[:,2:].mean(axis =0))]
      

factors_student['old'] = [(x,y) for x,y in zip(col_names, tsmod.stats1.student_t[:,2:].mean(axis =0))]







In [83]:
for cl in factors_student.keys():
    
    print cl
    factors_student[cl].sort(key =lambda x: x[1], reverse =True)
    print np.array(factors_student[cl])[:20]
    print

0
[['f9' '3.72079806707']
 ['f22' '3.68703199159']
 ['f50' '3.43751084589']
 ['f2' '2.70965211257']
 ['f47' '2.56356215554']
 ['f20' '2.46598847107']
 ['f4' '2.4166868126']
 ['f45' '2.41529681265']
 ['f49' '2.38357658478']
 ['f51' '2.16698043235']
 ['f28' '2.13457862555']
 ['f13' '2.07943489332']
 ['f18' '1.66227185474']
 ['f1' '1.6494103903']
 ['f16' '1.64757232946']
 ['f41' '1.61395790866']
 ['f27' '1.54883946648']
 ['f24' '1.53125867896']
 ['f6' '1.50640225993']
 ['f43' '1.33076332461']]

1
[['f2' '4.51633210595']
 ['f50' '3.02214629642']
 ['f7' '2.83055253236']
 ['f29' '2.46190829071']
 ['f49' '2.2654911589']
 ['f39' '2.09504113315']
 ['f45' '2.06224401477']
 ['f32' '1.904563837']
 ['f24' '1.86191252322']
 ['f36' '1.8607347435']
 ['f44' '1.83294115708']
 ['f9' '1.70072129166']
 ['f26' '1.62678073672']
 ['f34' '1.56119713309']
 ['f46' '1.48665289094']
 ['f28' '1.47833267057']
 ['f41' '1.47176143653']
 ['f18' '1.4715814259']
 ['f25' '1.37161130254']
 ['f19' '1.37061769511']]

old
[['