In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#Read training data
df_train = pd.read_csv('train_hire_stats.csv')
df_train.shape

In [None]:
#Check weekday & workday
#holidays連假的平常日
#workdays連假的補班日

holidays= {'2016-02-08', '2016-02-09', '2016-02-10', '2016-02-11', '2016-02-12','2016-02-29', '2016-04-04', '2016-04-05', '2016-6-9', '2016-6-10', 
'2016-09-15', '2016-09-16', '2016-10-10', '2017-01-02', '2017-01-27', '2017-01-30', '2017-02-01','2017-02-27', '2017-02-28'}
makeupworkdays = {'2016-06-04', '2016-09-10', '2017-02-18'}

In [None]:
df_train.head()

In [None]:
#Declare weekday & workday

isworkday = np.ones((len(df_train),), dtype=int)
weekday = np.ones((len(df_train),), dtype=int)

In [None]:
#Compute weekday & workday
#weekday()
#0 == Monday, #1 == Tuesday, #2 == Wednesday,  #3 == Thursday,  #4 == Friday,  #5 == Saturday, #6 == Sunday

from datetime import datetime

for index, row in df_train.iterrows():
    if row['Date'] in holidays:
        isworkday[index] = 0
    else:
        dd=datetime.strptime(row['Date'], "%Y-%m-%d")
        weekday[index]= dd.weekday() 
        if weekday[index] >=5 and row['Date'] not in makeupworkdays:
            isworkday[index] = 0


In [None]:
#Build a new dataframe from the training data

RawX = pd.DataFrame(df_train[["Zone_ID", "Hour_slot","Hire_count"]])
RawX['isworkday'] = isworkday
RawX['weekday'] = weekday

In [None]:
print(RawX.shape)

In [None]:
#Resize y into (0,1)

y = RawX["Hire_count"].values
y.shape
RawX=RawX.drop(columns=['Hire_count'])

In [None]:
#See the raw input data

RawX.head()

In [None]:
#Use OneHotEncoder

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')

In [None]:
#Build encoder

enc.fit_transform(RawX)
enc.categories_

In [None]:
#Transform data into one hot vector

X = enc.transform(RawX).toarray()
X.shape

In [None]:
#See the cooked input data
X[0:3, :]

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Activation, Dropout

In [None]:
# create and fit a Baseline Neural Network Model

model = Sequential()
model.add(Dense(128, activation='relu', input_dim=58))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))

In [None]:
from tensorflow.keras.optimizers import Adam

model.compile(loss='mse', optimizer=Adam(lr=1e-3,decay=1e-5))
model.fit(X, y, epochs=50, batch_size=1000, verbose=1)

# Test

In [None]:
df_test = pd.read_csv('test_hire_stats.csv')
df_test.shape

In [None]:
#Declare weekday & workday

isworkday2 = np.ones((len(df_test),), dtype=int)
weekday2 = np.ones((len(df_test),), dtype=int)

In [None]:
for index, row in df_test.iterrows():
    if row['Date'] in holidays:
        isworkday2[index] = 0
    else:
        dd=datetime.strptime(row['Date'], "%Y-%m-%d")
        weekday2[index]= dd.weekday() 
        if weekday2[index] >=5 and row['Date'] not in makeupworkdays:
            isworkday2[index] = 0

In [None]:
Test = pd.DataFrame(df_test[["Zone_ID", "Hour_slot"]])
Test['isworkday'] = isworkday2
Test['weekday'] = weekday2

In [None]:
Xtest = enc.transform(Test).toarray()
Xtest.shape

In [None]:
yt = model.predict(Xtest)

# 2017/2/10 Uber 退出台灣市場

In [None]:
月all_zone = [7, 8, 9, 12, 13, 14, 17, 18, 19]
interval = 28 * 24

for i in range(len(all_zone)):
  zone_start = interval * i
  modified_start = zone_start + 24 * 9
  modified_end = zone_start + interval
  for j in range(modified_start, modified_end):
    yt[j] = yt[j] * 1.12

# 儲存預測結果

In [None]:
test_df=pd.read_csv('test_hire_stats.csv',sep=',')
test_df['Hire_count']=yt
test_df.head()
test_df.to_csv('predict_x1o12.csv',index=False)