# Import Python Packages Needed  

In [34]:
import numpy as np
import pandas as pd 
import xgboost as xgb

# Read and Process Train data set 

In [26]:
data=pd.read_csv(".../train.csv")
data['date'] = pd.to_datetime(data['date'])
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['hour'] = data['date'].dt.hour
data['minute'] = data['date'].dt.minute
data['weekday'] = data['date'].dt.dayofweek

# Read and Process Weather data set 

In [27]:
weather = pd.DataFrame(pd.read_excel(".../weather.xlsx"))
weather['date'] = pd.to_datetime(weather['date'])
weather['year'] = weather['date'].dt.year
weather['month'] = weather['date'].dt.month
weather['day'] = weather['date'].dt.day
weather['tem_high']=weather['tem_high'].str.replace('℃','').astype('int32')
weather['tem_low']=weather['tem_low'].str.replace('℃','').astype('int32')

from sklearn.preprocessing import LabelEncoder    #用于Label编码

lf=LabelEncoder().fit(weather['wea_day'])
weather['wea_day']=lf.transform(weather['wea_day'])

lf=LabelEncoder().fit(weather['wea_night'])
weather['wea_night']=lf.transform(weather['wea_night'])

lf=LabelEncoder().fit(weather['wind'])
weather['wind']=lf.transform(weather['wind'])

# Merge the Train dataset and the Weather dataset

In [28]:
#data.join(weather,'date1')
wash_data=pd.merge(data,weather, on=["year","month","day"])

# Set model parameters and train the model

In [29]:
x_train=wash_data[['year','month','day','hour','weekday','tem_high','tem_low','wea_day_label','wea_night_label','wind_label','wea_day','wea_night','wind','is_rest']]
y_train=wash_data[['speed']]

xgtrain = xgb.DMatrix(x_train,y_train)   #

param = {'lambda':1,
         'eta':'0.1',
         'max_depth':8,
         'learning_rate':'0.1',
         'min_child_weight' :3,
         'gamma' : 0,
         'subsample':1,
        'colsample_bytree':1,
         'objective':'multi:softprob',
        #'objective':"reg:linear" ,
         'objective':"reg:squarederror"
        }    
num_round = 350  
model = xgb.train(param, xgtrain, num_round)   # train the model

# Read and Process Test data set 

In [31]:
test=pd.read_csv("test.csv")
test['date'] = pd.to_datetime(test['date'])
test['year'] = test['date'].dt.year
test['month'] = test['date'].dt.month
test['day'] = test['date'].dt.day
test['hour'] = test['date'].dt.hour
test['minute'] = test['date'].dt.minute
test['weekday'] = test['date'].dt.dayofweek

wash_data_test=pd.merge(test,weather, on=["year","month","day"])

# Bring test data into the model for prediction

In [32]:
x_test=wash_data_test[['year','month','day','hour','weekday','tem_high','tem_low','wea_day_label','wea_night_label','wind_label','wea_day','wea_night','wind','is_rest']]
xgtest = xgb.DMatrix(x_test)

y_test_pred = model.predict(xgtest)

# Save test data as csv

In [35]:
predict_test = {'id': range(len(y_test_pred)), 'speed': y_test_pred}
predict_pd=pd.DataFrame(predict_test)
predict_pd.to_csv('.../test.csv',index=None)