In [1]:
# Import libraries and set desired options
import numpy as np
import pandas as pd
from scipy.sparse import hstack
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import TimeSeriesSplit, cross_val_score, GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression

In [2]:


# A helper function for writing predictions to a file
def write_to_submission_file(predicted_labels, out_file,
                             target='target', index_label="session_id"):
    predicted_df = pd.DataFrame(predicted_labels,
                                index = np.arange(1, predicted_labels.shape[0] + 1),
                                columns=[target])
    predicted_df.to_csv(out_file, index_label=index_label)





Read training and test sets, sort train set by session start time.


In [3]:
train_df = pd.read_csv('train_sessions.csv',
                       index_col='session_id', parse_dates=['time1'])
test_df = pd.read_csv('test_sessions.csv',
                      index_col='session_id', parse_dates=['time1'])

# Sort the data by time
train_df = train_df.sort_values(by='time1')

# Look at the first rows of the training set
train_df.head()

Unnamed: 0_level_0,site1,time1,site2,time2,site3,time3,site4,time4,site5,time5,...,time6,site7,time7,site8,time8,site9,time9,site10,time10,target
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
21669,56,2013-01-12 08:05:57,55.0,2013-01-12 08:05:57,,,,,,,...,,,,,,,,,,0
54843,56,2013-01-12 08:37:23,55.0,2013-01-12 08:37:23,56.0,2013-01-12 09:07:07,55.0,2013-01-12 09:07:09,,,...,,,,,,,,,,0
77292,946,2013-01-12 08:50:13,946.0,2013-01-12 08:50:14,951.0,2013-01-12 08:50:15,946.0,2013-01-12 08:50:15,946.0,2013-01-12 08:50:16,...,2013-01-12 08:50:16,948.0,2013-01-12 08:50:16,784.0,2013-01-12 08:50:16,949.0,2013-01-12 08:50:17,946.0,2013-01-12 08:50:17,0
114021,945,2013-01-12 08:50:17,948.0,2013-01-12 08:50:17,949.0,2013-01-12 08:50:18,948.0,2013-01-12 08:50:18,945.0,2013-01-12 08:50:18,...,2013-01-12 08:50:18,947.0,2013-01-12 08:50:19,945.0,2013-01-12 08:50:19,946.0,2013-01-12 08:50:19,946.0,2013-01-12 08:50:20,0
146670,947,2013-01-12 08:50:20,950.0,2013-01-12 08:50:20,948.0,2013-01-12 08:50:20,947.0,2013-01-12 08:50:21,950.0,2013-01-12 08:50:21,...,2013-01-12 08:50:21,946.0,2013-01-12 08:50:21,951.0,2013-01-12 08:50:22,946.0,2013-01-12 08:50:22,947.0,2013-01-12 08:50:22,0




Transform data into format which can be fed into CountVectorizer


In [4]:
sites = ['site%s' % i for i in range(1, 11)]
train_df[sites].fillna(0).astype('int').to_csv('train_sessions_text.txt', 
                                               sep=' ', 
                       index=None, header=None)
test_df[sites].fillna(0).astype('int').to_csv('test_sessions_text.txt', 
                                              sep=' ', 
                       index=None, header=None)



Fit CountVectorizer and transfrom data with it.


In [5]:
%%time
cv = CountVectorizer(ngram_range=(1, 3), max_features=50000)
with open('train_sessions_text.txt') as inp_train_file:
    X_train = cv.fit_transform(inp_train_file)
with open('test_sessions_text.txt') as inp_test_file:
    X_test = cv.transform(inp_test_file)
X_train.shape, X_test.shape

CPU times: total: 8.94 s
Wall time: 8.96 s


((253561, 50000), (82797, 50000))



Save train targets into a separate vector.



In [6]:

y_train = train_df['target'].astype('int').values



We'll be performing time series cross-validation, see sklearn TimeSeriesSplit and this dicussion on StackOverflow.


In [7]:
time_split = TimeSeriesSplit(n_splits=10)

In [8]:
[(el[0].shape, el[1].shape) for el in time_split.split(X_train)]

[((23051,), (23051,)),
 ((46102,), (23051,)),
 ((69153,), (23051,)),
 ((92204,), (23051,)),
 ((115255,), (23051,)),
 ((138306,), (23051,)),
 ((161357,), (23051,)),
 ((184408,), (23051,)),
 ((207459,), (23051,)),
 ((230510,), (23051,))]



Perform time series cross-validation with logistic regression.


In [9]:
logit = LogisticRegression(C=1, random_state=17, solver='liblinear')

In [10]:


%%time

cv_scores = cross_val_score(logit, X_train, y_train, cv=time_split, 
                            scoring='roc_auc', n_jobs=1) # hangs with n_jobs > 1, and locally this runs much faster



CPU times: total: 27.5 s
Wall time: 31.4 s


In [13]:
cv_scores, cv_scores.mean()

(array([0.83141992, 0.64670079, 0.87991997, 0.9631551 , 0.84221478,
        0.87840646, 0.94475732, 0.85322131, 0.92988126, 0.90752918]),
 0.8677206084310283)

Train logistic regression with all training data, make predictions for test set and form a submission file.

In [14]:
logit.fit(X_train, y_train)

In [18]:
logit_test_pred = logit.predict_proba(X_test)[:, 1]

In [19]:
write_to_submission_file(logit_test_pred, 'sub1.csv')

In [15]:
def add_time_features(df, X_sparse):
    hour = df['time1'].apply(lambda ts: ts.hour)
    morning = ((hour >= 7) & (hour <= 11)).astype('int')
    day = ((hour >= 12) & (hour <= 18)).astype('int')
    evening = ((hour >= 19) & (hour <= 23)).astype('int')
    night = ((hour >= 0) & (hour <= 6)).astype('int')
    X = hstack([X_sparse, morning.values.reshape(-1, 1), 
                day.values.reshape(-1, 1), evening.values.reshape(-1, 1), 
                night.values.reshape(-1, 1)])
    return X

In [16]:


%%time
X_train_new = add_time_features(train_df.fillna(0), X_train)
X_test_new = add_time_features(test_df.fillna(0), X_test)



CPU times: total: 1.11 s
Wall time: 1.12 s


In [17]:


X_train_new.shape, X_test_new.shape



((253561, 50004), (82797, 50004))



Performing time series cross-validation, we see an improvement in ROC AUC.


In [23]:


%%time
cv_scores = cross_val_score(logit, X_train_new, y_train, cv=time_split, 
                            scoring='roc_auc', n_jobs=1) # hangs with n_jobs > 1, and locally this runs much faster



CPU times: total: 27.3 s
Wall time: 31.1 s


In [24]:


cv_scores, cv_scores.mean()



(array([0.87652264, 0.75122963, 0.93061982, 0.97864183, 0.90399606,
        0.93831429, 0.96249083, 0.92731256, 0.94886187, 0.94043803]),
 0.9158427557045007)



Making a new submission, we notice a leaderboard score improvement as well (0.91288 -> 0.93843). Correlated CV and LB improvements is a good justifications for added features being useful and CV scheme being correct.



In [25]:
logit.fit(X_train_new, y_train)

In [26]:


logit_test_pred2 = logit.predict_proba(X_test_new)[:, 1]
write_to_submission_file(logit_test_pred2, 'subm2.csv') # 0.93843





Now we tune regularization parameter C.


In [25]:
c_values = np.logspace(-2, 2, 10)

logit_grid_searcher = GridSearchCV(estimator=logit, param_grid={'C': c_values},
                                  scoring='roc_auc', n_jobs=1, cv=time_split, verbose=1)

In [26]:
%%time
logit_grid_searcher.fit(X_train_new, y_train) # WTF? Locally, it's 3min 30s

Fitting 10 folds for each of 10 candidates, totalling 100 fits




CPU times: total: 6min 32s
Wall time: 7min 28s


In [27]:

logit_grid_searcher.best_score_, logit_grid_searcher.best_params_
logit_test_pred3 = logit_grid_searcher.predict_proba(X_test_new)[:, 1]
write_to_submission_file(logit_test_pred3, 'subm3.csv') # 0.94242

may we will use a RandomForestClassifier

In [29]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=5)


rf = RandomForestClassifier(
    n_estimators=100, n_jobs=-1, random_state=42
)

parameters = {
    "max_features": [1, 2, 4],
    "min_samples_leaf": [3, 5, 7, 9],
    "max_depth": [5, 10, 15],
}

rf = GridSearchCV(rf, param_grid=parameters, scoring="roc_auc", cv=skf.split(X_train_new, y_train))
rf.fit(X_train_new, y_train)
print(rf.best_params_, rf.best_score_)

{'max_depth': 15, 'max_features': 4, 'min_samples_leaf': 3} 0.8514356361178388


In [35]:
parameters = {
    "n_estimators": [1, 2, 5, 10, 25, 30, 40, 50, 75, 90, 95, 100]
}
rf2 = RandomForestClassifier(
    n_jobs=-1, random_state=42
)
rf2 = GridSearchCV(rf2, param_grid=parameters, scoring="roc_auc", cv=skf.split(X_train_new, y_train))
rf2.fit(X_train_new, y_train)
print(rf2.best_params_, rf2.best_score_)

{'n_estimators': 100} 0.9454340246116899


{'n_estimators': 100} 0.9454340246116899

In [37]:
rf3 = RandomForestClassifier(
    n_estimators=100, n_jobs=-1, random_state=42
)

parameters = {
    "max_features": [1, 2, 3, 4, 5, 6, 7, 8, 10],
    "min_samples_leaf": [3, 5, 7, 9, 10],
    "max_depth": [5, 10, 15, 21, 22, 25],
}

rf3 = GridSearchCV(rf3, param_grid=parameters, scoring="roc_auc", cv=skf.split(X_train_new, y_train))
rf3.fit(X_train_new, y_train)
print(rf3.best_params_, rf3.best_score_)

{'max_depth': 25, 'max_features': 10, 'min_samples_leaf': 3} 0.9287787700728481


In [40]:
rf4 = RandomForestClassifier(n_estimators=100, max_depth=25, max_features=10, min_samples_leaf= 3,n_jobs=-1, random_state=42).fit(X_train_new, y_train)
rf_proba = rf4.predict_proba(X_test_new)[:, 1]

In [42]:
write_to_submission_file(rf_proba, 'subm5.csv') # 0.9079
write_to_submission_file(rf_proba, 'subm4.csv') #0.91541 without max_feature, and min-sample_list

In [48]:
%%time

from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import RandomizedSearchCV, cross_val_score

rf2 = RandomForestClassifier(
    n_estimators=100, n_jobs=-1, random_state=42
)

model1 = BaggingClassifier(rf2, 
                n_estimators=50, 
                bootstrap = True, random_state = 42)
model1.fit(X_train_new, y_train)
print(model1.best_score_)

KeyboardInterrupt: 

In [52]:
%%time
model = BaggingClassifier(LogisticRegression(class_weight='balanced'), 
                n_estimators=100, 
                bootstrap = True, random_state = 42)
# grid_r = RandomizedSearchCV(model,parameters,n_iter=20, cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=5), scoring='roc_auc', random_state=1)
c_values = np.logspace(-2, 2, 10)

logit_grid_searcher = GridSearchCV(estimator=logit, param_grid={'C': c_values},
                                  scoring='roc_auc', n_jobs=1, cv=time_split, verbose=1)

logit_grid_searcher.fit(X_train_new, y_train)
print(logit_grid_searcher.best_score_)

Fitting 10 folds for each of 10 candidates, totalling 100 fits




0.9173768889331676
CPU times: total: 6min 21s
Wall time: 6min 42s


In [64]:
from sklearn.ensemble import (BaggingClassifier, BaggingRegressor,
                            RandomForestClassifier, RandomForestRegressor)
from sklearn.model_selection import  train_test_split
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
# bdt = BaggingRegressor(DecisionTreeClassifier(random_state=42)).fit(X_train, y_train)


param = {'max_depth':[1, 5, 10, 15, 20]}

model = DecisionTreeClassifier(random_state=42)
grid_r = RandomizedSearchCV(model,param,n_iter=20, cv=10, scoring='roc_auc', random_state=1)
grid_r.fit(X_train_new, y_train)



In [66]:
dt_proba = grid_r.predict_proba(X_test_new)[:, 1]


In [68]:
write_to_submission_file(dt_proba, 'subm6.csv') # 0.9

In [70]:
model = DecisionTreeClassifier(random_state=42, class_weight='balanced')
parametrs = {'criterion': ['entropy', 'gini'],
                'max_depth': [1, 5, 10, 15, 16, 18, 19, 20],
                'min_samples_leaf':list(range(1, 6)),
                'max_features': list(range(4, 10))}

mod = GridSearchCV(estimator=model, param_grid=parametrs,
                                  scoring='roc_auc', n_jobs=1, cv=10, verbose=1)
mod.fit(X_train_new, y_train)           

Fitting 10 folds for each of 480 candidates, totalling 4800 fits


In [71]:
print(mod.best_params_)

{'criterion': 'gini', 'max_depth': 19, 'max_features': 8, 'min_samples_leaf': 2}


In [19]:
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
model = DecisionTreeClassifier(criterion= 'gini', max_depth= 19, max_features=8, min_samples_leaf=2, random_state=42, class_weight='balanced')


In [20]:
dt_proba = model.predict_proba(X_test_new)[:, 1]

NotFittedError: This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [74]:
write_to_submission_file(dt_proba, 'subm7.csv') # 0.9

In [22]:
from sklearn.ensemble import BaggingClassifier
clf1 = BaggingClassifier(model, n_estimators=10).fit(X_train_new, y_train)

In [24]:
write_to_submission_file(clf1.predict_proba(X_test_new)[:, 1], 'subm8.csv') # 0.9

In [32]:
X_train_new

<253561x50004 sparse matrix of type '<class 'numpy.int64'>'
	with 3633114 stored elements in COOrdinate format>

In [37]:
train_df.head()

Unnamed: 0_level_0,site1,time1,site2,time2,site3,time3,site4,time4,site5,time5,...,time6,site7,time7,site8,time8,site9,time9,site10,time10,target
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
21669,56,2013-01-12 08:05:57,55.0,2013-01-12 08:05:57,,,,,,,...,,,,,,,,,,0
54843,56,2013-01-12 08:37:23,55.0,2013-01-12 08:37:23,56.0,2013-01-12 09:07:07,55.0,2013-01-12 09:07:09,,,...,,,,,,,,,,0
77292,946,2013-01-12 08:50:13,946.0,2013-01-12 08:50:14,951.0,2013-01-12 08:50:15,946.0,2013-01-12 08:50:15,946.0,2013-01-12 08:50:16,...,2013-01-12 08:50:16,948.0,2013-01-12 08:50:16,784.0,2013-01-12 08:50:16,949.0,2013-01-12 08:50:17,946.0,2013-01-12 08:50:17,0
114021,945,2013-01-12 08:50:17,948.0,2013-01-12 08:50:17,949.0,2013-01-12 08:50:18,948.0,2013-01-12 08:50:18,945.0,2013-01-12 08:50:18,...,2013-01-12 08:50:18,947.0,2013-01-12 08:50:19,945.0,2013-01-12 08:50:19,946.0,2013-01-12 08:50:19,946.0,2013-01-12 08:50:20,0
146670,947,2013-01-12 08:50:20,950.0,2013-01-12 08:50:20,948.0,2013-01-12 08:50:20,947.0,2013-01-12 08:50:21,950.0,2013-01-12 08:50:21,...,2013-01-12 08:50:21,946.0,2013-01-12 08:50:21,951.0,2013-01-12 08:50:22,946.0,2013-01-12 08:50:22,947.0,2013-01-12 08:50:22,0


In [84]:
def add_range_features(df, X_sparse):
    hour = df['time1'].apply(lambda ts: ts.hour)
    df['time3'] = pd.to_datetime(df['time3'], format='mixed')
    hour2 = df['time3'].apply(lambda ts: ts.hour)
    # print(df['time1'].info)
    # print(df['time3'].info)
    diff = hour2 - hour
    morning = ((hour >= 7) & (hour <= 11)).astype('int')
    day = ((hour >= 12) & (hour <= 18)).astype('int')
    evening = ((hour >= 19) & (hour <= 23)).astype('int')
    night = ((hour >= 0) & (hour <= 6)).astype('int')
    X = hstack([X_sparse, diff.values.reshape(-1, 1),morning.values.reshape(-1, 1), 
                day.values.reshape(-1, 1), evening.values.reshape(-1, 1), 
                night.values.reshape(-1, 1)])
    return X
# add_range_features(test_df.fillna(0), X_test)

In [74]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 253561 entries, 21669 to 204762
Data columns (total 21 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   site1   253561 non-null  int64         
 1   time1   253561 non-null  datetime64[ns]
 2   site2   250098 non-null  float64       
 3   time2   250098 non-null  object        
 4   site3   246919 non-null  float64       
 5   time3   246919 non-null  object        
 6   site4   244321 non-null  float64       
 7   time4   244321 non-null  object        
 8   site5   241829 non-null  float64       
 9   time5   241829 non-null  object        
 10  site6   239495 non-null  float64       
 11  time6   239495 non-null  object        
 12  site7   237297 non-null  float64       
 13  time7   237297 non-null  object        
 14  site8   235224 non-null  float64       
 15  time8   235224 non-null  object        
 16  site9   233084 non-null  float64       
 17  time9   233084 non-null  objec

In [85]:
X_train_new2 = add_range_features(train_df.fillna(0), X_train)
# X_test_new2 = add_range_features(test_df.fillna(0), X_test)

In [86]:
X_test_new2 = add_range_features(test_df.fillna(0), X_test)

In [87]:
logit_grid_searcher.fit(X_train_new2, y_train) # WTF? Locally, it's 3min 30s

Fitting 10 folds for each of 10 candidates, totalling 100 fits




In [89]:
logit_grid_searcher.best_score_, logit_grid_searcher.best_params_
logit_test_pred3 = logit_grid_searcher.predict_proba(X_test_new2)[:, 1]
write_to_submission_file(logit_test_pred3, 'subm9.csv') # 0.94248

In [133]:
def add_detect_features(df, X_sparse):
    hour = df['time1'].apply(lambda ts: ts.hour)
    df['time3'] = pd.to_datetime(df['time3'], format='mixed')
    hour2 = df['time3'].apply(lambda ts: ts.hour)
    df['time10'] = pd.to_datetime(df['time10'], format='mixed')
    hour_site2 = df['time10'].apply(lambda ts: ts.hour)
    # print(df['time1'].info)
    # print(df['time3'].info)
    diff = hour2 - hour
    diff2 = hour_site2 - hour
    # print(df['site2'].astype('int'))
    det = (hour_site2 == hour.astype('int'))
    # print(det)
    morning = ((hour >= 7) & (hour <= 11)).astype('int')
    day = ((hour >= 12) & (hour <= 18)).astype('int')
    evening = ((hour >= 19) & (hour <= 23)).astype('int')
    night = ((hour >= 0) & (hour <= 6)).astype('int')
    X = hstack([X_sparse, morning.values.reshape(-1, 1), 
                day.values.reshape(-1, 1), evening.values.reshape(-1, 1), 
                night.values.reshape(-1, 1), det.values.reshape(-1, 1)])
    return X
X_train_new3 = add_detect_features(train_df.fillna(0), X_train)
X_test_new3 = add_detect_features(test_df.fillna(0), X_test)

In [134]:
logit_grid_searcher.fit(X_train_new3, y_train) # WTF? Locally, it's 3min 30s

Fitting 10 folds for each of 10 candidates, totalling 100 fits




In [146]:
logit_grid_searcher.best_score_, logit_grid_searcher.best_params_
X_test_new3 = add_detect_features(test_df.fillna(0), X_test)
logit_test_pred4 = logit_grid_searcher.predict_proba(X_test_new3)[:, 1]
write_to_submission_file(logit_test_pred4, 'subm11.csv') # 0.94247

In [141]:
train_df.tail(50)

Unnamed: 0_level_0,site1,time1,site2,time2,site3,time3,site4,time4,site5,time5,...,time6,site7,time7,site8,time8,site9,time9,site10,time10,target
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
190180,37259,2014-04-30 16:45:46,37259.0,2014-04-30 16:45:47,37259.0,2014-04-30 16:45:48,37259.0,2014-04-30 16:45:53,37259.0,2014-04-30 16:45:54,...,2014-04-30 16:45:55,37259.0,2014-04-30 16:45:58,37.0,2014-04-30 16:45:58,37259.0,2014-04-30 16:45:59,37259.0,2014-04-30 16:46:02,0
204881,37259,2014-04-30 16:46:03,37259.0,2014-04-30 16:46:04,37259.0,2014-04-30 16:46:05,37259.0,2014-04-30 16:46:06,37259.0,2014-04-30 16:46:09,...,2014-04-30 16:46:10,37259.0,2014-04-30 16:46:11,37259.0,2014-04-30 16:46:12,37259.0,2014-04-30 16:46:13,37259.0,2014-04-30 16:46:14,0
93837,37259,2014-04-30 16:46:15,37259.0,2014-04-30 16:46:16,37259.0,2014-04-30 16:46:19,37259.0,2014-04-30 16:46:20,37259.0,2014-04-30 16:46:21,...,2014-04-30 16:46:22,37259.0,2014-04-30 16:46:23,37259.0,2014-04-30 16:46:24,37259.0,2014-04-30 16:46:25,37259.0,2014-04-30 16:46:26,0
192561,37259,2014-04-30 16:46:27,37259.0,2014-04-30 16:46:28,37259.0,2014-04-30 16:46:29,37259.0,2014-04-30 16:46:30,37259.0,2014-04-30 16:46:31,...,2014-04-30 16:46:32,37259.0,2014-04-30 16:46:33,37259.0,2014-04-30 16:46:34,37259.0,2014-04-30 16:46:35,37259.0,2014-04-30 16:46:37,0
245979,37259,2014-04-30 16:46:38,37259.0,2014-04-30 16:46:39,37259.0,2014-04-30 16:46:40,37259.0,2014-04-30 16:46:41,37259.0,2014-04-30 16:46:42,...,2014-04-30 16:46:43,22.0,2014-04-30 16:47:14,812.0,2014-04-30 16:47:18,812.0,2014-04-30 16:49:03,270.0,2014-04-30 16:49:03,0
133096,820,2014-04-30 16:47:52,820.0,2014-04-30 16:47:54,820.0,2014-04-30 16:48:37,820.0,2014-04-30 16:48:39,980.0,2014-04-30 16:49:46,...,2014-04-30 16:52:41,,,,,,,,,0
208054,177,2014-04-30 16:49:06,167.0,2014-04-30 16:49:06,37260.0,2014-04-30 16:49:08,167.0,2014-04-30 16:49:08,511.0,2014-04-30 16:49:09,...,2014-04-30 16:49:09,37260.0,2014-04-30 16:49:10,511.0,2014-04-30 16:49:10,2883.0,2014-04-30 16:49:10,37260.0,2014-04-30 16:49:13,0
43621,37260,2014-04-30 16:49:14,37260.0,2014-04-30 16:49:17,37260.0,2014-04-30 16:49:19,37260.0,2014-04-30 16:49:25,511.0,2014-04-30 16:49:26,...,2014-04-30 16:49:26,2883.0,2014-04-30 16:49:27,37260.0,2014-04-30 16:49:27,37260.0,2014-04-30 16:49:33,2883.0,2014-04-30 16:49:34,0
214523,37260,2014-04-30 16:49:34,511.0,2014-04-30 16:49:34,37260.0,2014-04-30 16:49:39,37260.0,2014-04-30 16:49:40,37260.0,2014-04-30 16:49:41,...,2014-04-30 16:51:48,80.0,2014-04-30 16:51:48,913.0,2014-04-30 16:51:50,912.0,2014-04-30 16:51:50,3342.0,2014-04-30 16:51:50,0
213961,167,2014-04-30 16:51:50,362.0,2014-04-30 16:51:51,167.0,2014-04-30 16:51:51,363.0,2014-04-30 16:51:51,363.0,2014-04-30 16:51:52,...,2014-04-30 16:51:52,360.0,2014-04-30 16:51:52,167.0,2014-04-30 16:51:52,270.0,2014-04-30 16:51:52,364.0,2014-04-30 16:51:52,0
