In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
#from IPython.core.interactiveshell import InteractiveShell
#InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 30)

#sns.set_style("whitegrid")
#plt.style.use('bmh')
plt.style.use('seaborn-whitegrid')

# this allows plots to appear directly in the notebook
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
train_df = pd.read_csv('data/train.csv')
train_df['data_set'] = 'train'
test_df = pd.read_csv('data/test.csv')
test_df['data_set'] = 'test'
# combine train and test data into one df
test_df['registered'] = 0
test_df['casual'] = 0
test_df['count'] = 0

all_df = pd.concat([train_df, test_df])
# parse datetime colum & add new time related columns
dt = pd.DatetimeIndex(all_df['datetime'])
all_df.set_index(dt, inplace=True)

# logarithmic transformation of dependent cols
# (adding 1 first so that 0 values don't become -inf)
for col in ['casual', 'registered', 'count']:
    all_df[f'{col}_log'] = np.log(all_df[col] + 1)

all_df['date'] = dt.date # yyyymmdd
all_df['day'] = dt.day # dd
all_df['month'] = dt.month # mm
all_df['year'] = dt.year # yyyy
all_df['hour'] = dt.hour # hh
all_df['dow'] = dt.dayofweek #曜日 Mon:0 Tue:1 Wed:2 Thu:3 Fri:4 Sat:5 Sun:6
all_df['woy'] = dt.isocalendar().week #その日の週が年間で見ると何番目の週かを表す数字 [dt.weekofyear]は deprecated

# add a count_season column using join
by_season = all_df[all_df['data_set'] == 'train'].copy().groupby(['season'])[['count']].agg(sum)
by_season.columns = ['count_season']
all_df = all_df.join(by_season, on='season')


# feature engineer a new column whether its a peak hour or not
all_df['peak'] = all_df[['hour', 'workingday']]\
    .apply(lambda df: 1 if ((df['workingday'] == 1 and (df['hour'] == 8 or 17 <= df['hour'] <= 18)) \
                            or (df['workingday'] == 0 and 10 <= df['workingday'] <= 19)) else 0, axis = 1)
#ここの修正の仕方は、間違っているので要修正！
# sandy
#all_df['holiday'] = all_df[['month', 'day', 'holiday', 'year']]\
#    .apply(lambda df: 1 if (df['year'] == 2012 and df['month'] == 10 and df['day'] == 30) else 0, axis = 1)
# 修正後↓
all_df['holiday'] = all_df[['month', 'day', 'holiday', 'year']]\
    .apply(lambda df: 1 if (df['year'] == 2012 and df['month'] == 10 and df['day'] == 30) else df['holiday'], axis = 1)


# christmas and others
all_df['holiday'] = all_df[['month', 'day', 'holiday']]\
    .apply(lambda df: 1 if (df['month'] == 12 and df['day'] in [24, 26, 31]) else df['holiday'], axis = 1)
all_df['workingday'] = all_df[['month', 'day', 'workingday']]\
    .apply(lambda df: 0 if df['month'] == 12 and df['day'] in [24, 31] else df['workingday'], axis = 1)
# これは流石に気づかない気がする。。。気づけない気がする。。。
def get_day(day_start):
    day_end = day_start + pd.offsets.DateOffset(hours=23)
    return pd.date_range(day_start, day_end, freq="H")

# tax day
all_df.loc[get_day(datetime(2011, 4, 15)), "workingday"] = 1
all_df.loc[get_day(datetime(2012, 4, 16)), "workingday"] = 1

# thanksgiving friday
all_df.loc[get_day(datetime(2011, 11, 25)), "workingday"] = 0
all_df.loc[get_day(datetime(2012, 11, 23)), "workingday"] = 0

# tax day
all_df.loc[get_day(datetime(2011, 4, 15)), "holiday"] = 0
all_df.loc[get_day(datetime(2012, 4, 16)), "holiday"] = 0

# thanksgiving friday
all_df.loc[get_day(datetime(2011, 11, 25)), "holiday"] = 1
all_df.loc[get_day(datetime(2012, 11, 23)), "holiday"] = 1

#storms
all_df.loc[get_day(datetime(2012, 5, 21)), "holiday"] = 1

#tornado
all_df.loc[get_day(datetime(2012, 6, 1)), "holiday"] = 1
# from histogram
all_df['ideal'] = all_df[['temp', 'windspeed']]\
    .apply(lambda df: 1 if (df['temp'] > 27 and df['windspeed'] < 30) else 0, axis = 1)
    
all_df['sticky'] = all_df[['humidity', 'workingday']]\
    .apply(lambda df: 1 if (df['workingday'] == 1 and df['humidity'] >= 60) else 0, axis = 1)

# One-hot-Encoding for season
season_map = {1:'Spring', 2:'Summer', 3:'Fall', 4:'Winter'}
all_df['season'] = all_df['season'].map(lambda d : season_map[d])
temporary = pd.get_dummies(all_df['season'])
all_df['season_Fall'] = temporary['Fall']
all_df['season_Spring'] = temporary['Spring']
all_df['season_Summer'] = temporary['Summer']
all_df['season_Winter'] = temporary['Winter']

In [3]:
# instead of randomly splitting our training data 
# for cross validation, let's construct a framework that's more
# in line with how the data is divvied up for this competition
# (given first 19 days of each month, what is demand for remaining days)
# so, let's split our training data into 2 time contiguous datasets
# for fitting and validating our model (days 1-14 vs. days 15-19).

# also, since submissions are evaluated based on the
# root mean squared logarithmic error (RMSLE), let's replicate
# that computation as we test and tune our model.

train_df = all_df[all_df['data_set'] == 'train']
test_df = all_df[all_df['data_set'] == 'test']

def get_rmsle(y_pred, y_actual):
    diff = np.log(y_pred + 1) - np.log(y_actual + 1)
    mean_error = np.square(diff).mean()
    return np.sqrt(mean_error)

def custom_train_valid_split(data, cutoff_day=15):
    train = data[data['day'] <= cutoff_day]
    valid = data[data['day'] > cutoff_day]

    return train, valid

def prep_train_data(data, input_cols):
    X = data[input_cols].values
    y_r = data['registered_log'].values
    y_c = data['casual_log'].values

    return X, y_r, y_c

# predict on validation set & transform output back from log scale
def predict_on_validation_set(model, input_cols):
    
    train, valid = custom_train_valid_split(train_df)
    y_pred_comb_l = []
    y_actual_comb_l = []

    for year_val in [2011,2012]:
        for month_val in range(1,13):

            print(f'Now,{year_val} {month_val} training and validating...')
            # prepare training & validation set
            train_tmp = train.query('year <= @year_val and month <= @month_val')
            valid_tmp = valid.query('year == @year_val and month == @month_val')

            X_train, y_train_r, y_train_c = prep_train_data(train_tmp, input_cols)
            X_valid, y_valid_r, y_valid_c = prep_train_data(valid_tmp, input_cols)

            # training and validating
            model_r = model.fit(X_train, y_train_r)
            y_pred_r = np.exp(model_r.predict(X_valid)) - 1

            model_c = model.fit(X_train, y_train_c)
            y_pred_c = np.exp(model_c.predict(X_valid)) - 1

            y_pred_comb = np.round(y_pred_r + y_pred_c)
            y_pred_comb[y_pred_comb < 0] = 0
            y_pred_comb_l.extend(y_pred_comb)

            y_actual_comb = np.exp(y_valid_r) + np.exp(y_valid_c) - 2
            y_actual_comb_l.extend(y_actual_comb)

            #rmsle = get_rmsle(y_pred_comb, y_actual_comb)
            #rmsle_l.append(rmsle)
    
    rmsle = get_rmsle(np.array(y_pred_comb_l),np.array(y_actual_comb_l))
    
    return (np.array(y_pred_comb_l), np.array(y_actual_comb_l), rmsle)


# predict on test set & transform output back from log scale
def predict_on_test_set(model, input_cols):
    
    y_pred_comb_l = []
    for year_val in [2011,2012]:
        for month_val in range(1,13):
            
            # prepare training set
            print(f'Now,{year_val} {month_val} testing...')
            train_df_tmp = train_df.query('year <= @year_val and month <= @month_val')
            test_df_tmp = test_df.query('year == @year_val and month == @month_val')

            X_train, y_train_r, y_train_c = prep_train_data(train_df_tmp, input_cols)

            # prepare testing set
            X_test = test_df_tmp[input_cols].values
            
            model_c = model.fit(X_train, y_train_c)
            y_pred_c = np.exp(model_c.predict(X_test)) - 1

            model_r = model.fit(X_train, y_train_r)
            y_pred_r = np.exp(model_r.predict(X_test)) - 1
            
            # add casual & registered predictions together
            y_pred_comb = np.round(y_pred_r + y_pred_c)
            y_pred_comb[y_pred_comb < 0] = 0
            y_pred_comb_l.extend(y_pred_comb)

    
    return np.array(y_pred_comb_l)

In [4]:
params = {'n_estimators': 1000, 'max_depth': 15, 'random_state': 0, 'min_samples_split' : 5, 'n_jobs': -1}
rf_model = RandomForestRegressor(**params)
rf_cols = [
    'weather', 'temp', 'atemp', 'windspeed',
    #'workingday', 'season', 'holiday', 'sticky',
    'workingday', 'season_Fall', 'season_Spring', 'season_Summer', 'season_Winter', 'holiday', 'sticky',
    'hour', 'dow', 'woy', 'peak'
    ]

(rf_pred, rf_actual, rf_rmsle) = predict_on_validation_set(rf_model, rf_cols)

Now,2011 1 training and validating...
Now,2011 2 training and validating...
Now,2011 3 training and validating...
Now,2011 4 training and validating...
Now,2011 5 training and validating...
Now,2011 6 training and validating...
Now,2011 7 training and validating...
Now,2011 8 training and validating...
Now,2011 9 training and validating...
Now,2011 10 training and validating...
Now,2011 11 training and validating...
Now,2011 12 training and validating...
Now,2012 1 training and validating...
Now,2012 2 training and validating...
Now,2012 3 training and validating...
Now,2012 4 training and validating...
Now,2012 5 training and validating...
Now,2012 6 training and validating...
Now,2012 7 training and validating...
Now,2012 8 training and validating...
Now,2012 9 training and validating...
Now,2012 10 training and validating...
Now,2012 11 training and validating...
Now,2012 12 training and validating...


In [5]:
print(f'rf_pred.shape: {rf_pred.shape}   rf_actual.shape: {rf_actual.shape}   rf_rmsle: {rf_rmsle}')
all_df[rf_cols].corr()

rf_pred.shape: (2286,)   rf_actual.shape: (2286,)   rf_rmsle: 0.4259204238571178


Unnamed: 0,weather,temp,atemp,windspeed,workingday,season_Fall,season_Spring,season_Summer,season_Winter,holiday,sticky,hour,dow,woy,peak
weather,1.0,-0.10264,-0.105563,0.026226,0.042061,-0.087771,0.030999,0.016738,0.041561,0.026336,0.243523,-0.020203,-0.046424,0.009692,0.013185
temp,-0.10264,1.0,0.987672,-0.023125,0.069153,0.642516,-0.583859,0.144363,-0.217601,-0.070342,-0.007074,0.137603,-0.03622,0.198641,0.043486
atemp,-0.105563,0.987672,1.0,-0.062336,0.067594,0.61957,-0.58747,0.151903,-0.198218,-0.072605,0.004717,0.13375,-0.038918,0.205561,0.041081
windspeed,0.026226,-0.023125,-0.062336,1.0,-0.001937,-0.089358,0.116168,0.063447,-0.08941,-0.005298,-0.187671,0.137252,0.003274,-0.131613,0.052051
workingday,0.042061,0.069153,0.067594,-0.001937,1.0,0.023562,-0.039523,0.032831,-0.01777,-0.240847,0.5369,0.002185,-0.698028,-0.0257,0.205012
season_Fall,-0.087771,0.642516,0.61957,-0.089358,0.023562,1.0,-0.335693,-0.344433,-0.335169,-0.053882,0.018962,-0.003348,-0.005217,0.192278,0.003168
season_Spring,0.030999,-0.583859,-0.58747,0.116168,-0.039523,-0.335693,1.0,-0.331312,-0.322401,0.081297,-0.0959,0.008363,0.014214,-0.545152,-0.003548
season_Summer,0.016738,0.144363,0.151903,0.063447,0.032831,-0.344433,-0.331312,1.0,-0.330795,-0.05191,0.010923,-0.002919,-0.00895,-0.325239,0.002468
season_Winter,0.041561,-0.217601,-0.198218,-0.08941,-0.01777,-0.335169,-0.322401,-0.330795,1.0,0.026241,0.065555,-0.001994,0.00017,0.679132,-0.002183
holiday,0.026336,-0.070342,-0.072605,-0.005298,-0.240847,-0.053882,0.081297,-0.05191,0.026241,1.0,-0.10933,0.003451,-0.169788,0.081117,-0.037377


In [6]:
params = {'n_estimators': 150, 'max_depth': 5, 'random_state': 0, 'min_samples_leaf' : 10, 'learning_rate': 0.1, 'subsample': 0.7, 'loss': 'ls'}
gbm_model = GradientBoostingRegressor(**params)
gbm_cols = [
    'weather', 'temp', 'atemp', 'humidity', 'windspeed',
    #'holiday', 'workingday', 'season',
    'holiday', 'workingday', 'season_Fall', 'season_Spring', 'season_Summer', 'season_Winter',
    'hour', 'dow', 'year', 'ideal', #'count_season',
]

(gbm_pred, gbm_actual, gbm_rmsle) = predict_on_validation_set(gbm_model, gbm_cols)

Now,2011 1 training and validating...
Now,2011 2 training and validating...
Now,2011 3 training and validating...
Now,2011 4 training and validating...
Now,2011 5 training and validating...
Now,2011 6 training and validating...
Now,2011 7 training and validating...
Now,2011 8 training and validating...
Now,2011 9 training and validating...
Now,2011 10 training and validating...
Now,2011 11 training and validating...
Now,2011 12 training and validating...
Now,2012 1 training and validating...
Now,2012 2 training and validating...
Now,2012 3 training and validating...
Now,2012 4 training and validating...
Now,2012 5 training and validating...
Now,2012 6 training and validating...
Now,2012 7 training and validating...
Now,2012 8 training and validating...
Now,2012 9 training and validating...
Now,2012 10 training and validating...
Now,2012 11 training and validating...
Now,2012 12 training and validating...


In [7]:
print(f'gbm_pred.shape: {gbm_pred.shape}   gbm_actual.shape: {gbm_actual.shape}   gbm_rmsle: {gbm_rmsle}')

gbm_pred.shape: (2286,)   gbm_actual.shape: (2286,)   gbm_rmsle: 0.3489402537581922


In [8]:
print(f'gbm_pred.shape: {gbm_pred.shape}   gbm_actual.shape: {gbm_actual.shape}   gbm_rmsle: {gbm_rmsle}')
all_df[gbm_cols].corr()

gbm_pred.shape: (2286,)   gbm_actual.shape: (2286,)   gbm_rmsle: 0.3489402537581922


Unnamed: 0,weather,temp,atemp,humidity,windspeed,holiday,workingday,season_Fall,season_Spring,season_Summer,season_Winter,hour,dow,year,ideal
weather,1.0,-0.10264,-0.105563,0.41813,0.026226,0.026336,0.042061,-0.087771,0.030999,0.016738,0.041561,-0.020203,-0.046424,-0.019157,-0.145407
temp,-0.10264,1.0,0.987672,-0.069881,-0.023125,-0.070342,0.069153,0.642516,-0.583859,0.144363,-0.217601,0.137603,-0.03622,0.040913,0.727266
atemp,-0.105563,0.987672,1.0,-0.051918,-0.062336,-0.072605,0.067594,0.61957,-0.58747,0.151903,-0.198218,0.13375,-0.038918,0.039222,0.701874
humidity,0.41813,-0.069881,-0.051918,1.0,-0.290105,0.012676,0.014316,0.018184,-0.135138,-0.000625,0.117326,-0.276498,-0.035233,-0.083546,-0.141678
windspeed,0.026226,-0.023125,-0.062336,-0.290105,1.0,-0.005298,-0.001937,-0.089358,0.116168,0.063447,-0.08941,0.137252,0.003274,-0.00874,-0.051489
holiday,0.026336,-0.070342,-0.072605,0.012676,-0.005298,1.0,-0.240847,-0.053882,0.081297,-0.05191,0.026241,0.003451,-0.169788,0.029875,-0.022858
workingday,0.042061,0.069153,0.067594,0.014316,-0.001937,-0.240847,1.0,0.023562,-0.039523,0.032831,-0.01777,0.002185,-0.698028,-0.007959,0.023068
season_Fall,-0.087771,0.642516,0.61957,0.018184,-0.089358,-0.053882,0.023562,1.0,-0.335693,-0.344433,-0.335169,-0.003348,-0.005217,-0.000923,0.640205
season_Spring,0.030999,-0.583859,-0.58747,-0.135138,0.116168,0.081297,-0.039523,-0.335693,1.0,-0.331312,-0.322401,0.008363,0.014214,0.01129,-0.318055
season_Summer,0.016738,0.144363,0.151903,-0.000625,0.063447,-0.05191,0.032831,-0.344433,-0.331312,1.0,-0.330795,-0.002919,-0.00895,-0.002589,-0.050193


In [9]:
# the blend gives a better score on the leaderboard, even though it does not on the validation set
y_pred = np.round(.4*rf_pred + .6*gbm_pred)
print(get_rmsle(y_pred, rf_actual))

0.35356341706641553


In [10]:
rf_pred = predict_on_test_set(rf_model, rf_cols)
gbm_pred = predict_on_test_set(gbm_model, gbm_cols)

y_pred = np.round(.4*rf_pred + .6*gbm_pred)

Now,2011 1 testing...
Now,2011 2 testing...
Now,2011 3 testing...
Now,2011 4 testing...
Now,2011 5 testing...
Now,2011 6 testing...
Now,2011 7 testing...
Now,2011 8 testing...
Now,2011 9 testing...
Now,2011 10 testing...
Now,2011 11 testing...
Now,2011 12 testing...
Now,2012 1 testing...
Now,2012 2 testing...
Now,2012 3 testing...
Now,2012 4 testing...
Now,2012 5 testing...
Now,2012 6 testing...
Now,2012 7 testing...
Now,2012 8 testing...
Now,2012 9 testing...
Now,2012 10 testing...
Now,2012 11 testing...
Now,2012 12 testing...
Now,2011 1 testing...
Now,2011 2 testing...
Now,2011 3 testing...
Now,2011 4 testing...
Now,2011 5 testing...
Now,2011 6 testing...
Now,2011 7 testing...
Now,2011 8 testing...
Now,2011 9 testing...
Now,2011 10 testing...
Now,2011 11 testing...
Now,2011 12 testing...
Now,2012 1 testing...
Now,2012 2 testing...
Now,2012 3 testing...
Now,2012 4 testing...
Now,2012 5 testing...
Now,2012 6 testing...
Now,2012 7 testing...
Now,2012 8 testing...
Now,2012 9 testing...
N

In [11]:
# output predictions for submission
submit_manual_blend_df = test_df[['datetime', 'count']].copy()
submit_manual_blend_df['count'] = y_pred
submit_manual_blend_df.to_csv('output/submit_manual_blend_20211003_4.csv', index=False)

In [12]:
# Level 0 RandomForestRegressor
rf_params = {'n_estimators': 1000, 'max_depth': 15, 'random_state': 0, 'min_samples_split' : 5, 'n_jobs': -1}
rf_model = RandomForestRegressor(**rf_params)
rf_cols = [
    'weather', 'temp', 'atemp', 'windspeed',
    'workingday', 'season', 'holiday', 'sticky',
    'hour', 'dow', 'woy', 'peak'
    ]
# Level 0 GradientBoostingRegressor
gbm_params = {'n_estimators': 150, 'max_depth': 5, 'random_state': 0, 'min_samples_leaf' : 10, 'learning_rate': 0.1, 'subsample': 0.7, 'loss': 'ls'}
gbm_model = GradientBoostingRegressor(**gbm_params)
gbm_cols = [
    'weather', 'temp', 'atemp', 'humidity', 'windspeed',
    'holiday', 'workingday', 'season',
    'hour', 'dow', 'year', 'ideal', 'count_season',
]
clf_input_cols = [rf_cols, gbm_cols]
clfs = [rf_model, gbm_model]
# Create train and test sets for blending and Pre-allocate the data
blend_train = np.zeros((train_df.shape[0], len(clfs)))
blend_test = np.zeros((test_df.shape[0], len(clfs)))

In [13]:
# For each classifier, we train the classifier with its corresponding input_cols 
# and record the predictions on the train and the test set
for clf_index, (input_cols, clf) in enumerate(zip(clf_input_cols, clfs)):
    
    # prepare training and validation set
    X_train, y_train_r, y_train_c = prep_train_data(train_df, input_cols)
    
    # prepare testing set
    X_test = test_df[input_cols].values
    
    model_r = clf.fit(X_train, y_train_r)
    y_pred_train_r = np.exp(model_r.predict(X_train)) - 1
    y_pred_test_r = np.exp(model_r.predict(X_test)) - 1

    model_c = clf.fit(X_train, y_train_c)
    y_pred_train_c = np.exp(model_c.predict(X_train)) - 1
    y_pred_test_c = np.exp(model_c.predict(X_test)) - 1

    y_pred_train_comb = np.round(y_pred_train_r + y_pred_train_c)
    y_pred_train_comb[y_pred_train_comb < 0] = 0
    
    y_pred_test_comb = np.round(y_pred_test_r + y_pred_test_c)
    y_pred_test_comb[y_pred_test_comb < 0] = 0
    
    blend_train[:, clf_index] = y_pred_train_comb
    blend_test[:, clf_index] = y_pred_test_comb

ValueError: could not convert string to float: 'Spring'

In [None]:
# Level 1 Belending Classifier using LinearRegression
from sklearn.linear_model import LinearRegression
bclf = LinearRegression(fit_intercept=False)
bclf.fit(blend_train, train_df['count'])
# What is the weighted combination of the base classifiers?
print(bclf.coef_)
# Stacked and Blending predictions
y_pred_blend = np.round(bclf.predict(blend_test))
# R^2 score
bclf.score(blend_train, train_df['count'])

[0.31036788 0.73305461]


0.9657514579162971

In [None]:
# output predictions for submission
submit_stack_blend_df = test_df[['datetime', 'count']].copy()
submit_stack_blend_df['count'] = y_pred_blend
submit_stack_blend_df.to_csv('output/submit_stack_blend_20211003_4.csv', index=False)

In [14]:
all_df.head(3)

Unnamed: 0_level_0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count,data_set,casual_log,registered_log,count_log,date,day,month,year,hour,dow,woy,count_season,peak,ideal,sticky,season_Fall,season_Spring,season_Summer,season_Winter
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2011-01-01 00:00:00,2011-01-01 00:00:00,Spring,0,0,1,9.84,14.395,81,0.0,3,13,16,train,1.386294,2.639057,2.833213,2011-01-01,1,1,2011,0,5,52,312498,0,0,0,0,1,0,0
2011-01-01 01:00:00,2011-01-01 01:00:00,Spring,0,0,1,9.02,13.635,80,0.0,8,32,40,train,2.197225,3.496508,3.713572,2011-01-01,1,1,2011,1,5,52,312498,0,0,0,0,1,0,0
2011-01-01 02:00:00,2011-01-01 02:00:00,Spring,0,0,1,9.02,13.635,80,0.0,5,27,32,train,1.791759,3.332205,3.496508,2011-01-01,1,1,2011,2,5,52,312498,0,0,0,0,1,0,0
