## Librerías

In [105]:
import pandas as pd
import numpy as np
import os
from functools import reduce
from datetime import date,datetime
from dateutil.relativedelta import relativedelta as rd # *

from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import Perceptron,PassiveAggressiveClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import roc_auc_score,accuracy_score # 1/2
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV # *

import matplotlib.pyplot as plt
import seaborn as sns
import pygal # *

pd.set_option('display.max_columns',500)

## Lectura de datos

In [2]:
ruta = '/home/jose/Documentos/bd/ecobici/'

In [3]:
l = os.listdir(ruta)
l.sort()

In [4]:
l

['2017-11.csv',
 '2017-12.csv',
 '2018-01.csv',
 '2018-02.csv',
 '2018-03.csv',
 '2018-04.csv',
 '2018-05.csv',
 '2018-06.csv',
 '2018-07.csv',
 '2018-08.csv',
 '2018-09.csv',
 '2018-10.csv',
 '2018-11.csv',
 '2018-12.csv',
 '2019-01.csv',
 '2019-02.csv',
 '2019-03.csv',
 '2019-04.csv',
 '2019-05.csv',
 '2019-06.csv',
 '2019-07.csv',
 '2019-08.csv',
 '2019-09.csv',
 '2019-10.csv',
 'estaciones-de-ecobici.csv']

### Muestra

In [35]:
df = pd.concat(map(lambda arch:pd.read_csv(os.path.join(ruta,arch),usecols=['Ciclo_Estacion_Retiro']).drop_duplicates(),
                   l[:-1]),
               ignore_index=True).drop_duplicates().reset_index(drop=True)

In [36]:
cat = df.sample(n=100).reset_index(drop=True)

In [37]:
df = pd.concat(map(lambda arch:pd.read_csv(os.path.join(ruta,arch)).merge(cat,on='Ciclo_Estacion_Retiro',how='inner'),
                   l[:-1]),ignore_index=True)

  
  


In [39]:
df.shape

(3565585, 10)

## Catálogo de ventanas de tiempo

In [40]:
df['semana'] = pd.to_datetime(df['Fecha_Retiro']).map(lambda x:x.year*100+int(x.strftime('%U')))

In [41]:
df.tail()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Unnamed: 9,semana
3565580,M,59,f2009,1001,30/10/2019,13:58:10,1001,30/10/2019,14:02:46,,201943
3565581,M,59,f2009,1001,30/10/2019,14:32:01,1001,30/10/2019,15:59:52,,201943
3565582,M,59,lab003,1001,30/10/2019,16:57:27,1001,30/10/2019,17:00:48,,201943
3565583,M,59,f2002,1001,29/10/2019,15:05:58,1001,30/10/2019,11:38:59,,201943
3565584,M,59,lab003,1001,30/10/2019,17:00:55,1001,31/10/2019,09:23:54,,201943


In [42]:
catfh = df[['semana']].drop_duplicates().sort_values('semana',ascending=True).reset_index(drop=True)

In [43]:
catfh['id_semana'] = catfh.index+1

In [44]:
df = df.merge(catfh,on='semana',how='inner')

In [45]:
df.drop('semana',axis=1,inplace=True)
df.drop('Unnamed: 9',axis=1,inplace=True)

In [46]:
df.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,id_semana
0,M,27,10899,84,01/11/2017,0:02:54,150,01/11/2017,0:06:30,2
1,F,26,2191,84,01/11/2017,5:51:33,54,01/11/2017,5:57:50,2
2,M,43,10158,84,01/11/2017,6:07:50,146,01/11/2017,6:11:00,2
3,M,44,10248,84,01/11/2017,6:29:15,146,01/11/2017,6:49:08,2
4,M,22,9742,84,01/11/2017,6:31:32,32,01/11/2017,6:47:15,2


## Ingeniería de datos

In [59]:
semi,semf = df[['id_semana']].describe().T[['min','max']].values[0].tolist()
semi,semf

(1.0, 127.0)

In [65]:
vobs = 12
vdes = 1
anclai = int(semi)+vobs-1
anclaf = int(semf)-vdes
anclai,anclaf

(12, 126)

In [129]:
um = ['Ciclo_Estacion_Retiro','ancla']

In [149]:
ancla = 12
step = 3

In [112]:
varc = ['Edad_Usuario','n']
vard = ['Genero_Usuario','hora']

In [162]:
def trans(df,ancla,k):
    print(ancla,k)
    aux = df.loc[(df['id_semana']>=(ancla-k+1))&(df['id_semana']<=ancla)].reset_index(drop=True)
    aux = aux[['id_semana','Genero_Usuario','Edad_Usuario','Hora_Retiro','Ciclo_Estacion_Retiro']]
    aux['hora'] =aux['Hora_Retiro'].map(lambda x:int(x.split(':')[0])//6).astype(str)
    aux.drop('Hora_Retiro',axis=1,inplace=True)
    aux['n'] = 1
    
    t = aux.copy()
    
    for v in vard:
        t[v] = 'total_%s'%v
    aux = pd.concat([aux,t],ignore_index=True)
    
    def piv(aux,v,ancla):
        aux = aux.pivot_table(index='Ciclo_Estacion_Retiro',
                        columns=v,
                        values=varc,
                        aggfunc=['min','max','mean','sum','std'])
        aux.columns = ["v_"+"_".join(x)+"_%d"%k for x in aux.columns]
        return aux.reset_index().assign(ancla=ancla)
    
    aux = reduce(lambda x,y:pd.merge(x,y,on=um,how='outer'),map(lambda v:piv(aux,v,ancla),vard))
    return aux

In [163]:
X = pd.concat(map(lambda ancla:reduce(lambda x,y:pd.merge(x,y,on=um,how='outer'),
       map(lambda k:trans(df,ancla,k),range(step,vobs+step,step))),range(anclai,anclaf+1)),ignore_index=True)

12 3
12 6
12 9
12 12
13 3
13 6
13 9
13 12
14 3
14 6
14 9
14 12
15 3
15 6
15 9
15 12
16 3
16 6
16 9
16 12
17 3
17 6
17 9
17 12
18 3
18 6
18 9
18 12
19 3
19 6
19 9
19 12
20 3
20 6
20 9
20 12
21 3
21 6
21 9
21 12
22 3
22 6
22 9
22 12
23 3
23 6
23 9
23 12
24 3
24 6
24 9
24 12
25 3
25 6
25 9
25 12
26 3
26 6
26 9
26 12
27 3
27 6
27 9
27 12
28 3
28 6
28 9
28 12
29 3
29 6
29 9
29 12
30 3
30 6
30 9
30 12
31 3
31 6
31 9
31 12
32 3
32 6
32 9
32 12
33 3
33 6
33 9
33 12
34 3
34 6
34 9
34 12
35 3
35 6
35 9
35 12
36 3
36 6
36 9
36 12
37 3
37 6
37 9
37 12
38 3
38 6
38 9
38 12
39 3
39 6
39 9
39 12
40 3
40 6
40 9
40 12
41 3
41 6
41 9
41 12
42 3
42 6
42 9
42 12
43 3
43 6
43 9
43 12
44 3
44 6
44 9
44 12
45 3
45 6
45 9
45 12
46 3
46 6
46 9
46 12
47 3
47 6
47 9
47 12
48 3
48 6
48 9
48 12
49 3
49 6
49 9
49 12
50 3
50 6
50 9
50 12
51 3
51 6
51 9
51 12
52 3
52 6
52 9
52 12
53 3
53 6
53 9
53 12
54 3
54 6
54 9
54 12
55 3
55 6
55 9
55 12
56 3
56 6
56 9
56 12
57 3
57 6
57 9
57 12
58 3
58 6
58 9
58 12
59 3
59 6
59 

In [165]:
X.head()

Unnamed: 0,Ciclo_Estacion_Retiro,v_min_Edad_Usuario_F_3,v_min_Edad_Usuario_M_3,v_min_Edad_Usuario_total_Genero_Usuario_3,v_min_n_F_3,v_min_n_M_3,v_min_n_total_Genero_Usuario_3,v_max_Edad_Usuario_F_3,v_max_Edad_Usuario_M_3,v_max_Edad_Usuario_total_Genero_Usuario_3,v_max_n_F_3,v_max_n_M_3,v_max_n_total_Genero_Usuario_3,v_mean_Edad_Usuario_F_3,v_mean_Edad_Usuario_M_3,v_mean_Edad_Usuario_total_Genero_Usuario_3,v_mean_n_F_3,v_mean_n_M_3,v_mean_n_total_Genero_Usuario_3,v_sum_Edad_Usuario_F_3,v_sum_Edad_Usuario_M_3,v_sum_Edad_Usuario_total_Genero_Usuario_3,v_sum_n_F_3,v_sum_n_M_3,v_sum_n_total_Genero_Usuario_3,v_std_Edad_Usuario_F_3,v_std_Edad_Usuario_M_3,v_std_Edad_Usuario_total_Genero_Usuario_3,v_std_n_F_3,v_std_n_M_3,v_std_n_total_Genero_Usuario_3,ancla,v_min_Edad_Usuario_0_3,v_min_Edad_Usuario_1_3,v_min_Edad_Usuario_2_3,v_min_Edad_Usuario_3_3,v_min_Edad_Usuario_total_hora_3,v_min_n_0_3,v_min_n_1_3,v_min_n_2_3,v_min_n_3_3,v_min_n_total_hora_3,v_max_Edad_Usuario_0_3,v_max_Edad_Usuario_1_3,v_max_Edad_Usuario_2_3,v_max_Edad_Usuario_3_3,v_max_Edad_Usuario_total_hora_3,v_max_n_0_3,v_max_n_1_3,v_max_n_2_3,v_max_n_3_3,v_max_n_total_hora_3,v_mean_Edad_Usuario_0_3,v_mean_Edad_Usuario_1_3,v_mean_Edad_Usuario_2_3,v_mean_Edad_Usuario_3_3,v_mean_Edad_Usuario_total_hora_3,v_mean_n_0_3,v_mean_n_1_3,v_mean_n_2_3,v_mean_n_3_3,v_mean_n_total_hora_3,v_sum_Edad_Usuario_0_3,v_sum_Edad_Usuario_1_3,v_sum_Edad_Usuario_2_3,v_sum_Edad_Usuario_3_3,v_sum_Edad_Usuario_total_hora_3,v_sum_n_0_3,v_sum_n_1_3,v_sum_n_2_3,v_sum_n_3_3,v_sum_n_total_hora_3,v_std_Edad_Usuario_0_3,v_std_Edad_Usuario_1_3,v_std_Edad_Usuario_2_3,v_std_Edad_Usuario_3_3,v_std_Edad_Usuario_total_hora_3,v_std_n_0_3,v_std_n_1_3,v_std_n_2_3,v_std_n_3_3,v_std_n_total_hora_3,v_min_Edad_Usuario_F_6,v_min_Edad_Usuario_M_6,v_min_Edad_Usuario_total_Genero_Usuario_6,v_min_n_F_6,v_min_n_M_6,v_min_n_total_Genero_Usuario_6,v_max_Edad_Usuario_F_6,v_max_Edad_Usuario_M_6,v_max_Edad_Usuario_total_Genero_Usuario_6,v_max_n_F_6,v_max_n_M_6,v_max_n_total_Genero_Usuario_6,v_mean_Edad_Usuario_F_6,v_mean_Edad_Usuario_M_6,v_mean_Edad_Usuario_total_Genero_Usuario_6,v_mean_n_F_6,v_mean_n_M_6,v_mean_n_total_Genero_Usuario_6,v_sum_Edad_Usuario_F_6,v_sum_Edad_Usuario_M_6,v_sum_Edad_Usuario_total_Genero_Usuario_6,v_sum_n_F_6,v_sum_n_M_6,v_sum_n_total_Genero_Usuario_6,v_std_Edad_Usuario_F_6,v_std_Edad_Usuario_M_6,v_std_Edad_Usuario_total_Genero_Usuario_6,v_std_n_F_6,v_std_n_M_6,v_std_n_total_Genero_Usuario_6,v_min_Edad_Usuario_0_6,v_min_Edad_Usuario_1_6,v_min_Edad_Usuario_2_6,v_min_Edad_Usuario_3_6,v_min_Edad_Usuario_total_hora_6,v_min_n_0_6,v_min_n_1_6,v_min_n_2_6,v_min_n_3_6,v_min_n_total_hora_6,v_max_Edad_Usuario_0_6,v_max_Edad_Usuario_1_6,v_max_Edad_Usuario_2_6,v_max_Edad_Usuario_3_6,v_max_Edad_Usuario_total_hora_6,v_max_n_0_6,v_max_n_1_6,v_max_n_2_6,v_max_n_3_6,v_max_n_total_hora_6,v_mean_Edad_Usuario_0_6,v_mean_Edad_Usuario_1_6,v_mean_Edad_Usuario_2_6,v_mean_Edad_Usuario_3_6,v_mean_Edad_Usuario_total_hora_6,v_mean_n_0_6,v_mean_n_1_6,v_mean_n_2_6,v_mean_n_3_6,v_mean_n_total_hora_6,v_sum_Edad_Usuario_0_6,v_sum_Edad_Usuario_1_6,v_sum_Edad_Usuario_2_6,v_sum_Edad_Usuario_3_6,v_sum_Edad_Usuario_total_hora_6,v_sum_n_0_6,v_sum_n_1_6,v_sum_n_2_6,v_sum_n_3_6,v_sum_n_total_hora_6,v_std_Edad_Usuario_0_6,v_std_Edad_Usuario_1_6,v_std_Edad_Usuario_2_6,v_std_Edad_Usuario_3_6,v_std_Edad_Usuario_total_hora_6,v_std_n_0_6,v_std_n_1_6,v_std_n_2_6,v_std_n_3_6,v_std_n_total_hora_6,v_min_Edad_Usuario_F_9,v_min_Edad_Usuario_M_9,v_min_Edad_Usuario_total_Genero_Usuario_9,v_min_n_F_9,v_min_n_M_9,v_min_n_total_Genero_Usuario_9,v_max_Edad_Usuario_F_9,v_max_Edad_Usuario_M_9,v_max_Edad_Usuario_total_Genero_Usuario_9,v_max_n_F_9,v_max_n_M_9,v_max_n_total_Genero_Usuario_9,v_mean_Edad_Usuario_F_9,v_mean_Edad_Usuario_M_9,v_mean_Edad_Usuario_total_Genero_Usuario_9,v_mean_n_F_9,v_mean_n_M_9,v_mean_n_total_Genero_Usuario_9,v_sum_Edad_Usuario_F_9,v_sum_Edad_Usuario_M_9,v_sum_Edad_Usuario_total_Genero_Usuario_9,v_sum_n_F_9,v_sum_n_M_9,v_sum_n_total_Genero_Usuario_9,v_std_Edad_Usuario_F_9,v_std_Edad_Usuario_M_9,v_std_Edad_Usuario_total_Genero_Usuario_9,v_std_n_F_9,v_std_n_M_9,v_std_n_total_Genero_Usuario_9,v_min_Edad_Usuario_0_9,v_min_Edad_Usuario_1_9,v_min_Edad_Usuario_2_9,v_min_Edad_Usuario_3_9,v_min_Edad_Usuario_total_hora_9,v_min_n_0_9,v_min_n_1_9,v_min_n_2_9,v_min_n_3_9,v_min_n_total_hora_9,v_max_Edad_Usuario_0_9,v_max_Edad_Usuario_1_9,v_max_Edad_Usuario_2_9,v_max_Edad_Usuario_3_9,v_max_Edad_Usuario_total_hora_9,v_max_n_0_9,v_max_n_1_9,v_max_n_2_9,v_max_n_3_9,v_max_n_total_hora_9,v_mean_Edad_Usuario_0_9,v_mean_Edad_Usuario_1_9,v_mean_Edad_Usuario_2_9,v_mean_Edad_Usuario_3_9,v_mean_Edad_Usuario_total_hora_9,v_mean_n_0_9,v_mean_n_1_9,v_mean_n_2_9,v_mean_n_3_9,v_mean_n_total_hora_9,v_sum_Edad_Usuario_0_9,v_sum_Edad_Usuario_1_9,v_sum_Edad_Usuario_2_9,v_sum_Edad_Usuario_3_9,v_sum_Edad_Usuario_total_hora_9,v_sum_n_0_9,v_sum_n_1_9,v_sum_n_2_9,v_sum_n_3_9,v_sum_n_total_hora_9,v_std_Edad_Usuario_0_9,v_std_Edad_Usuario_1_9,v_std_Edad_Usuario_2_9,v_std_Edad_Usuario_3_9,v_std_Edad_Usuario_total_hora_9,v_std_n_0_9,v_std_n_1_9,v_std_n_2_9,v_std_n_3_9,v_std_n_total_hora_9,v_min_Edad_Usuario_F_12,v_min_Edad_Usuario_M_12,v_min_Edad_Usuario_total_Genero_Usuario_12,v_min_n_F_12,v_min_n_M_12,v_min_n_total_Genero_Usuario_12,v_max_Edad_Usuario_F_12,v_max_Edad_Usuario_M_12,v_max_Edad_Usuario_total_Genero_Usuario_12,v_max_n_F_12,v_max_n_M_12,v_max_n_total_Genero_Usuario_12,v_mean_Edad_Usuario_F_12,v_mean_Edad_Usuario_M_12,v_mean_Edad_Usuario_total_Genero_Usuario_12,v_mean_n_F_12,v_mean_n_M_12,v_mean_n_total_Genero_Usuario_12,v_sum_Edad_Usuario_F_12,v_sum_Edad_Usuario_M_12,v_sum_Edad_Usuario_total_Genero_Usuario_12,v_sum_n_F_12,v_sum_n_M_12,v_sum_n_total_Genero_Usuario_12,v_std_Edad_Usuario_F_12,v_std_Edad_Usuario_M_12,v_std_Edad_Usuario_total_Genero_Usuario_12,v_std_n_F_12,v_std_n_M_12,v_std_n_total_Genero_Usuario_12,v_min_Edad_Usuario_0_12,v_min_Edad_Usuario_1_12,v_min_Edad_Usuario_2_12,v_min_Edad_Usuario_3_12,v_min_Edad_Usuario_total_hora_12,v_min_n_0_12,v_min_n_1_12,v_min_n_2_12,v_min_n_3_12,v_min_n_total_hora_12,v_max_Edad_Usuario_0_12,v_max_Edad_Usuario_1_12,v_max_Edad_Usuario_2_12,v_max_Edad_Usuario_3_12,v_max_Edad_Usuario_total_hora_12,v_max_n_0_12,v_max_n_1_12,v_max_n_2_12,v_max_n_3_12,v_max_n_total_hora_12,v_mean_Edad_Usuario_0_12,v_mean_Edad_Usuario_1_12,v_mean_Edad_Usuario_2_12,v_mean_Edad_Usuario_3_12,v_mean_Edad_Usuario_total_hora_12,v_mean_n_0_12,v_mean_n_1_12,v_mean_n_2_12,v_mean_n_3_12,v_mean_n_total_hora_12,v_sum_Edad_Usuario_0_12,v_sum_Edad_Usuario_1_12,v_sum_Edad_Usuario_2_12,v_sum_Edad_Usuario_3_12,v_sum_Edad_Usuario_total_hora_12,v_sum_n_0_12,v_sum_n_1_12,v_sum_n_2_12,v_sum_n_3_12,v_sum_n_total_hora_12,v_std_Edad_Usuario_0_12,v_std_Edad_Usuario_1_12,v_std_Edad_Usuario_2_12,v_std_Edad_Usuario_3_12,v_std_Edad_Usuario_total_hora_12,v_std_n_0_12,v_std_n_1_12,v_std_n_2_12,v_std_n_3_12,v_std_n_total_hora_12
0,1,19.0,19.0,19.0,1.0,1.0,1.0,71.0,75.0,75.0,1.0,1.0,1.0,32.975717,36.487678,35.690226,1.0,1.0,1.0,14938.0,56264.0,71202.0,453.0,1542.0,1995.0,8.759411,10.129123,9.942275,0.0,0.0,0.0,12,28.0,19.0,19.0,19.0,19.0,1.0,1.0,1.0,1.0,1.0,61.0,72.0,75.0,71.0,75.0,1.0,1.0,1.0,1.0,1.0,46.428571,35.719892,36.168869,34.945423,35.690226,1.0,1.0,1.0,1.0,1.0,325.0,26397.0,24631.0,19849.0,71202.0,7.0,739.0,681.0,568.0,1995.0,14.081396,10.018074,10.26642,9.286476,9.942275,0.0,0.0,0.0,0.0,0.0,18.0,19.0,18.0,1.0,1.0,1.0,71.0,75.0,75.0,1.0,1.0,1.0,33.057423,36.485488,35.666332,1.0,1.0,1.0,23603.0,82968.0,106571.0,714.0,2274.0,2988.0,8.739542,10.169657,9.953395,0.0,0.0,0.0,28.0,19.0,18.0,19.0,18.0,1.0,1.0,1.0,1.0,1.0,61.0,72.0,75.0,71.0,75.0,1.0,1.0,1.0,1.0,1.0,47.333333,35.794824,35.929511,35.037215,35.666332,1.0,1.0,1.0,1.0,1.0,426.0,38730.0,38229.0,29186.0,106571.0,9.0,1082.0,1064.0,833.0,2988.0,13.397761,10.041378,10.241933,9.324405,9.953395,0.0,0.0,0.0,0.0,0.0,18.0,17.0,17.0,1.0,1.0,1.0,71.0,76.0,76.0,1.0,1.0,1.0,33.071253,36.509992,35.690574,1.0,1.0,1.0,26920.0,94999.0,121919.0,814.0,2602.0,3416.0,8.578691,10.202793,9.947445,0.0,0.0,0.0,28.0,19.0,17.0,19.0,17.0,1.0,1.0,1.0,1.0,1.0,61.0,76.0,75.0,71.0,76.0,1.0,1.0,1.0,1.0,1.0,46.909091,35.893515,35.909316,35.009544,35.690574,1.0,1.0,1.0,1.0,1.0,516.0,44831.0,43558.0,33014.0,121919.0,11.0,1249.0,1213.0,943.0,3416.0,12.152815,10.090251,10.201357,9.291168,9.947445,0.0,0.0,0.0,0.0,0.0,18.0,17.0,17.0,1.0,1.0,1.0,71.0,76.0,76.0,1.0,1.0,1.0,32.889219,36.477336,35.624589,1.0,1.0,1.0,33251.0,118296.0,151547.0,1011.0,3243.0,4254.0,8.637564,10.164639,9.940328,0.0,0.0,0.0,28.0,19.0,17.0,19.0,17.0,1.0,1.0,1.0,1.0,1.0,61.0,76.0,75.0,71.0,76.0,1.0,1.0,1.0,1.0,1.0,44.411765,35.758221,35.878176,34.976542,35.624589,1.0,1.0,1.0,1.0,1.0,755.0,55461.0,55073.0,40258.0,151547.0,17.0,1551.0,1535.0,1151.0,4254.0,12.150405,9.97527,10.297283,9.281321,9.940328,0.0,0.0,0.0,0.0,0.0
1,2,18.0,19.0,18.0,1.0,1.0,1.0,66.0,70.0,70.0,1.0,1.0,1.0,33.762821,33.825462,33.810264,1.0,1.0,1.0,5267.0,16473.0,21740.0,156.0,487.0,643.0,10.547657,9.863488,10.025423,0.0,0.0,0.0,12,25.0,18.0,19.0,22.0,18.0,1.0,1.0,1.0,1.0,1.0,32.0,66.0,70.0,66.0,70.0,1.0,1.0,1.0,1.0,1.0,29.0,32.912844,36.238095,32.396226,33.810264,1.0,1.0,1.0,1.0,1.0,87.0,7175.0,7610.0,6868.0,21740.0,3.0,218.0,210.0,212.0,643.0,3.605551,10.576329,10.797859,8.131828,10.025423,0.0,0.0,0.0,0.0,0.0,18.0,19.0,18.0,1.0,1.0,1.0,66.0,71.0,71.0,1.0,1.0,1.0,33.5125,33.884154,33.794147,1.0,1.0,1.0,8043.0,25447.0,33490.0,240.0,751.0,991.0,10.293184,9.835915,9.944594,0.0,0.0,0.0,25.0,18.0,18.0,19.0,18.0,1.0,1.0,1.0,1.0,1.0,32.0,71.0,70.0,66.0,71.0,1.0,1.0,1.0,1.0,1.0,29.0,33.234756,35.556886,32.595092,33.794147,1.0,1.0,1.0,1.0,1.0,87.0,10901.0,11876.0,10626.0,33490.0,3.0,328.0,334.0,326.0,991.0,3.605551,10.727786,10.546112,8.139656,9.944594,0.0,0.0,0.0,0.0,0.0,18.0,19.0,18.0,1.0,1.0,1.0,66.0,71.0,71.0,1.0,1.0,1.0,33.713768,33.818287,33.792982,1.0,1.0,1.0,9305.0,29219.0,38524.0,276.0,864.0,1140.0,10.388173,9.769799,9.918507,0.0,0.0,0.0,25.0,18.0,18.0,19.0,18.0,1.0,1.0,1.0,1.0,1.0,56.0,71.0,70.0,66.0,71.0,1.0,1.0,1.0,1.0,1.0,35.75,33.351421,35.244275,32.648876,33.792982,1.0,1.0,1.0,1.0,1.0,143.0,12907.0,13851.0,11623.0,38524.0,4.0,387.0,393.0,356.0,1140.0,13.81726,10.899233,10.274616,8.024103,9.918507,0.0,0.0,0.0,0.0,0.0,18.0,19.0,18.0,1.0,1.0,1.0,66.0,71.0,71.0,1.0,1.0,1.0,34.177143,34.018009,34.057651,1.0,1.0,1.0,11962.0,35889.0,47851.0,350.0,1055.0,1405.0,10.612343,9.972099,10.131765,0.0,0.0,0.0,25.0,18.0,18.0,18.0,18.0,1.0,1.0,1.0,1.0,1.0,56.0,71.0,70.0,69.0,71.0,1.0,1.0,1.0,1.0,1.0,35.375,33.698947,35.432377,32.880184,34.057651,1.0,1.0,1.0,1.0,1.0,283.0,16007.0,17291.0,14270.0,47851.0,8.0,475.0,488.0,434.0,1405.0,9.575825,11.121685,10.41971,8.376224,10.131765,0.0,0.0,0.0,0.0,0.0
2,4,17.0,18.0,17.0,1.0,1.0,1.0,69.0,75.0,75.0,1.0,1.0,1.0,33.226415,35.209877,34.72093,1.0,1.0,1.0,5283.0,17112.0,22395.0,159.0,486.0,645.0,9.363898,9.864504,9.773822,0.0,0.0,0.0,12,27.0,19.0,17.0,19.0,17.0,1.0,1.0,1.0,1.0,1.0,34.0,63.0,75.0,69.0,75.0,1.0,1.0,1.0,1.0,1.0,31.769231,33.625,36.254464,34.258929,34.72093,1.0,1.0,1.0,1.0,1.0,413.0,6187.0,8121.0,7674.0,22395.0,13.0,184.0,224.0,224.0,645.0,2.862221,10.411512,9.982607,9.096637,9.773822,0.0,0.0,0.0,0.0,0.0,17.0,18.0,17.0,1.0,1.0,1.0,69.0,78.0,78.0,1.0,1.0,1.0,33.069672,35.41094,34.814002,1.0,1.0,1.0,8069.0,25248.0,33317.0,244.0,713.0,957.0,9.270471,10.187332,10.009038,0.0,0.0,0.0,24.0,19.0,17.0,19.0,17.0,1.0,1.0,1.0,1.0,1.0,46.0,72.0,78.0,69.0,78.0,1.0,1.0,1.0,1.0,1.0,31.882353,34.765568,36.030211,33.803571,34.814002,1.0,1.0,1.0,1.0,1.0,542.0,9491.0,11926.0,11358.0,33317.0,17.0,273.0,331.0,336.0,957.0,4.897478,11.192007,10.175088,8.839031,10.009038,0.0,0.0,0.0,0.0,0.0,17.0,18.0,17.0,1.0,1.0,1.0,69.0,78.0,78.0,1.0,1.0,1.0,32.927536,35.294621,34.697441,1.0,1.0,1.0,9088.0,28871.0,37959.0,276.0,818.0,1094.0,9.106352,10.175089,9.965487,0.0,0.0,0.0,20.0,19.0,17.0,19.0,17.0,1.0,1.0,1.0,1.0,1.0,67.0,72.0,78.0,69.0,78.0,1.0,1.0,1.0,1.0,1.0,33.181818,34.653722,35.73057,33.763926,34.697441,1.0,1.0,1.0,1.0,1.0,730.0,10708.0,13792.0,12729.0,37959.0,22.0,309.0,386.0,377.0,1094.0,9.079733,11.055637,10.105673,8.791314,9.965487,0.0,0.0,0.0,0.0,0.0,17.0,18.0,17.0,1.0,1.0,1.0,69.0,78.0,78.0,1.0,1.0,1.0,33.252199,35.167498,34.681548,1.0,1.0,1.0,11339.0,35273.0,46612.0,341.0,1003.0,1344.0,9.522776,10.015052,9.924049,0.0,0.0,0.0,20.0,19.0,17.0,19.0,17.0,1.0,1.0,1.0,1.0,1.0,67.0,72.0,78.0,69.0,78.0,1.0,1.0,1.0,1.0,1.0,33.153846,34.432642,35.757384,33.864629,34.681548,1.0,1.0,1.0,1.0,1.0,862.0,13291.0,16949.0,15510.0,46612.0,26.0,386.0,474.0,458.0,1344.0,8.33159,10.780062,10.232715,8.791981,9.924049,0.0,0.0,0.0,0.0,0.0
3,7,21.0,18.0,18.0,1.0,1.0,1.0,65.0,78.0,78.0,1.0,1.0,1.0,33.365854,34.602564,34.36836,1.0,1.0,1.0,5472.0,24291.0,29763.0,164.0,702.0,866.0,7.529105,10.105726,9.67886,0.0,0.0,0.0,12,26.0,18.0,19.0,19.0,18.0,1.0,1.0,1.0,1.0,1.0,50.0,67.0,78.0,59.0,78.0,1.0,1.0,1.0,1.0,1.0,36.4,33.654008,35.611276,33.463415,34.36836,1.0,1.0,1.0,1.0,1.0,182.0,7976.0,12001.0,9604.0,29763.0,5.0,237.0,337.0,287.0,866.0,8.961027,9.370805,10.904357,8.180517,9.67886,0.0,0.0,0.0,0.0,0.0,21.0,18.0,18.0,1.0,1.0,1.0,65.0,78.0,78.0,1.0,1.0,1.0,33.481172,34.730624,34.500386,1.0,1.0,1.0,8002.0,36745.0,44747.0,239.0,1058.0,1297.0,8.209358,10.310608,9.965692,0.0,0.0,0.0,26.0,18.0,19.0,19.0,18.0,1.0,1.0,1.0,1.0,1.0,50.0,67.0,78.0,59.0,78.0,1.0,1.0,1.0,1.0,1.0,36.666667,34.074499,35.749515,33.311475,34.500386,1.0,1.0,1.0,1.0,1.0,220.0,11892.0,18411.0,14224.0,44747.0,6.0,349.0,515.0,427.0,1297.0,8.041559,9.821988,11.1924,8.24074,9.965692,0.0,0.0,0.0,0.0,0.0,21.0,18.0,18.0,1.0,1.0,1.0,65.0,78.0,78.0,1.0,1.0,1.0,33.308824,34.789958,34.515337,1.0,1.0,1.0,9060.0,41574.0,50634.0,272.0,1195.0,1467.0,8.282391,10.380301,10.03848,0.0,0.0,0.0,26.0,18.0,19.0,19.0,18.0,1.0,1.0,1.0,1.0,1.0,50.0,76.0,78.0,63.0,78.0,1.0,1.0,1.0,1.0,1.0,36.666667,34.5525,35.362729,33.347826,34.515337,1.0,1.0,1.0,1.0,1.0,220.0,13821.0,21253.0,15340.0,50634.0,6.0,400.0,601.0,460.0,1467.0,8.041559,10.168123,11.074665,8.281321,10.03848,0.0,0.0,0.0,0.0,0.0,18.0,18.0,18.0,1.0,1.0,1.0,65.0,78.0,78.0,1.0,1.0,1.0,33.123529,34.90737,34.573942,1.0,1.0,1.0,11262.0,51628.0,62890.0,340.0,1479.0,1819.0,8.309076,10.345333,10.018377,0.0,0.0,0.0,26.0,18.0,19.0,18.0,18.0,1.0,1.0,1.0,1.0,1.0,50.0,76.0,78.0,63.0,78.0,1.0,1.0,1.0,1.0,1.0,36.0,34.675944,35.378378,33.415493,34.573942,1.0,1.0,1.0,1.0,1.0,288.0,17442.0,26180.0,18980.0,62890.0,8.0,503.0,740.0,568.0,1819.0,7.091242,10.215426,10.92613,8.424993,10.018377,0.0,0.0,0.0,0.0,0.0
4,18,19.0,19.0,19.0,1.0,1.0,1.0,61.0,72.0,72.0,1.0,1.0,1.0,32.423497,36.561544,35.690627,1.0,1.0,1.0,11867.0,50199.0,62066.0,366.0,1373.0,1739.0,8.200795,10.367871,10.090943,0.0,0.0,0.0,12,20.0,19.0,19.0,19.0,19.0,1.0,1.0,1.0,1.0,1.0,55.0,72.0,70.0,65.0,72.0,1.0,1.0,1.0,1.0,1.0,36.266667,36.655263,36.345013,34.260797,35.690627,1.0,1.0,1.0,1.0,1.0,544.0,13929.0,26968.0,20625.0,62066.0,15.0,380.0,742.0,602.0,1739.0,8.631062,10.7708,10.401889,9.110347,10.090943,0.0,0.0,0.0,0.0,0.0,17.0,18.0,17.0,1.0,1.0,1.0,64.0,78.0,78.0,1.0,1.0,1.0,32.508885,36.298353,35.443513,1.0,1.0,1.0,20123.0,77134.0,97257.0,619.0,2125.0,2744.0,8.432744,10.504838,10.19706,0.0,0.0,0.0,20.0,19.0,18.0,17.0,17.0,1.0,1.0,1.0,1.0,1.0,55.0,72.0,78.0,70.0,78.0,1.0,1.0,1.0,1.0,1.0,36.090909,36.815498,36.230431,33.85906,35.443513,1.0,1.0,1.0,1.0,1.0,794.0,19954.0,41194.0,35315.0,97257.0,22.0,542.0,1137.0,1043.0,2744.0,8.495988,10.764798,10.481944,9.388413,10.19706,0.0,0.0,0.0,0.0,0.0,17.0,18.0,17.0,1.0,1.0,1.0,64.0,78.0,78.0,1.0,1.0,1.0,32.469565,36.240564,35.401483,1.0,1.0,1.0,22404.0,87376.0,109780.0,690.0,2411.0,3101.0,8.364159,10.47939,10.167804,0.0,0.0,0.0,20.0,19.0,18.0,17.0,17.0,1.0,1.0,1.0,1.0,1.0,55.0,72.0,78.0,70.0,78.0,1.0,1.0,1.0,1.0,1.0,35.44,36.962723,36.027502,33.850435,35.401483,1.0,1.0,1.0,1.0,1.0,886.0,22806.0,47160.0,38928.0,109780.0,25.0,617.0,1309.0,1150.0,3101.0,8.441366,10.881008,10.376145,9.340306,10.167804,0.0,0.0,0.0,0.0,0.0,17.0,18.0,17.0,1.0,1.0,1.0,64.0,78.0,78.0,1.0,1.0,1.0,32.644498,36.158202,35.381449,1.0,1.0,1.0,26997.0,105365.0,132362.0,827.0,2914.0,3741.0,8.546659,10.43868,10.1553,0.0,0.0,0.0,20.0,19.0,18.0,17.0,17.0,1.0,1.0,1.0,1.0,1.0,55.0,72.0,78.0,70.0,78.0,1.0,1.0,1.0,1.0,1.0,36.133333,36.998684,35.877564,33.853949,35.381449,1.0,1.0,1.0,1.0,1.0,1084.0,28119.0,57727.0,45432.0,132362.0,30.0,760.0,1609.0,1342.0,3741.0,7.846516,10.795683,10.383351,9.323316,10.1553,0.0,0.0,0.0,0.0,0.0


In [166]:
X.shape

(11378, 322)