In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense,LSTM, Dropout
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
import keras

In [None]:
keras.__version__

In [None]:
tensorflow

In [None]:
df_zf = pd.read_csv('/content/drive/MyDrive/Thesis/check/zf_data.csv')
df_weather = pd.read_csv('/content/drive/MyDrive/Thesis/check/weather.csv')

In [None]:
df_zf

In [None]:
df_weather

In [None]:
df_weather['Timestamp'] = pd.to_datetime(df_weather['Timestamp'])
df_zf['start_plant2'] = pd.to_datetime(df_zf['start_plant2'])

df_weather['time'] = df_weather['Timestamp'].apply(lambda x: x.strftime("%Y-%m-%d %H"))
df_zf['time'] = df_zf['start_plant2'].apply(lambda x: x.strftime("%Y-%m-%d %H"))

merge_df = pd.merge(df_zf,df_weather,on='time')

merge_df = merge_df[['Week_Day','Week','Hour','Minutes','Seconds','speed_threshold','Clouds','Temp','Wind_deg','Wind_speed','Rain_1h','Rain_3h','Snow_1h','Snow_3h','travel_time(2-1)']] 

In [None]:
merge_df

In [None]:
train = merge_df.iloc[0:-150]
test = merge_df.iloc[-150:]

In [None]:
train_y = train['travel_time(2-1)']
test_y = test['travel_time(2-1)']

train_x = train.drop('travel_time(2-1)', axis=1)
test_x = test.drop('travel_time(2-1)', axis=1)

In [None]:
def mean_absolute_percentage_error(test,predictions): 
    test = np.array(test)
    predictions = np.array(predictions)
    return np.mean(np.abs((test - predictions) / test)) * 100

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [None]:
lr1 = RandomForestRegressor()
lr1.fit(train_x, train_y)

predicted_qualities = lr1.predict(test_x)

(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print("Random Forest model" )
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

print(mean_absolute_percentage_error(test_y,predicted_qualities))

In [None]:
lr1.get_params()

In [None]:
import itertools
from tqdm import tqdm

n_estimators = [10,20,30,40,5060,70,80,90,100]
# Maximum number of levels in tree
max_depth = [7,8,9,10,11,12]
# Minimum number of samples required to split a node
min_samples_split = [2]
# Minimum number of samples required at each leaf node
min_samples_leaf = [2]

In [None]:
rf_test_accuracy = pd.DataFrame(columns = ['n_estimators','max_depth','min_samples_split','min_samples_leaf','RMSE','MAE', 'MAPE'])
for x in tqdm(list(itertools.product(n_estimators, max_depth,min_samples_split,min_samples_leaf)),desc='Random Forest Hyperparameter Tunning'):
  rf = RandomForestRegressor(n_estimators = x[0],max_depth = x[1], min_samples_split = x[2],min_samples_leaf=x[3],n_jobs=-1)
  rf.fit(train_x, train_y)

  predicted_qualities = rf.predict(test_x)

  (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

  mape = mean_absolute_percentage_error(test_y,predicted_qualities)
  rf_test_accuracy_one = pd.DataFrame(index = range(1),columns = ['n_estimators','max_depth','min_samples_split','min_samples_leaf','RMSE','MAE','MAPE']) 

  rf_test_accuracy_one.loc[:,'n_estimators'] = x[0]
  rf_test_accuracy_one.loc[:,'max_depth'] = x[1]
  rf_test_accuracy_one.loc[:,'min_samples_split'] = x[2]
  rf_test_accuracy_one.loc[:,'min_samples_leaf'] = x[3]
  rf_test_accuracy_one.loc[:,'RMSE'] = rmse
  rf_test_accuracy_one.loc[:,'MAE'] = mae
  rf_test_accuracy_one.loc[:,'MAPE'] = mape

  rf_test_accuracy = pd.concat([rf_test_accuracy,rf_test_accuracy_one])
  

In [None]:
rf_test_accuracy[rf_test_accuracy['MAPE'] == rf_test_accuracy['MAPE'].min()]

In [None]:
plt.plot(test_y.values, label='Actual Values')
plt.plot(predicted_qualities,color='red',label = 'Forecasting Prediction')
plt.legend(loc='best')

In [None]:
lr2 = xgboost.XGBRegressor(verbosity=0)
lr2.fit(train_x, train_y)

predicted_qualities = lr2.predict(test_x)

(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print("XGBoost model" )
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)
print(mean_absolute_percentage_error(test_y,predicted_qualities))

In [None]:
#XG Boost
# Number of trees
n_estimators = 
# Maximum number of levels in tree
max_depth = [12,13,14,15]
#minimum sum of weights of all observations required in a child
min_child_weight = [1,2]
#Gamma specifies the minimum loss reduction required to make a split
gamma = [1,5]
# boosting learning rate
 = [.1,.05,.01]

In [None]:
xgb_test_accuracy = pd.DataFrame(columns = ['n_estimators','max_depth','min_child_weight','gamma','learning_rate','RMSE','MAE', 'MAPE'])
for x in tqdm(list(itertools.product(n_estimators, max_depth,min_child_weight,gamma,learning_rate)),desc='Random Forest Hyperparameter Tunning'):
  xgb = xgboost.XGBRegressor(n_estimators = x[0],max_depth = x[1], min_child_weight = x[2],gamma=x[3],learning_rate=x[4],verbosity = 0,n_jobs=-1)
  xgb.fit(train_x, train_y)

  predicted_qualities = xgb.predict(test_x)

  (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

  mape = mean_absolute_percentage_error(test_y,predicted_qualities)
  xgb_test_accuracy_one = pd.DataFrame(index = range(1),columns = ['n_estimators','max_depth','min_child_weight','gamma','learning_rate','RMSE','MAE', 'MAPE']) 

  xgb_test_accuracy_one.loc[:,'n_estimators'] = x[0]
  xgb_test_accuracy_one.loc[:,'max_depth'] = x[1]
  xgb_test_accuracy_one.loc[:,'min_child_weight'] = x[2]
  xgb_test_accuracy_one.loc[:,'gamma'] = x[3]
  xgb_test_accuracy_one.loc[:,'learning_rate'] = x[4]
  xgb_test_accuracy_one.loc[:,'RMSE'] = rmse
  xgb_test_accuracy_one.loc[:,'MAE'] = mae
  xgb_test_accuracy_one.loc[:,'MAPE'] = mape

  xgb_test_accuracy = pd.concat([xgb_test_accuracy,xgb_test_accuracy_one])
  

In [None]:
xgb_test_accuracy[xgb_test_accuracy['MAPE'] == xgb_test_accuracy['MAPE'].min()]

In [None]:
plt.plot(test_y.values, label='Actual Values')
plt.plot(predicted_qualities,color='red',label = 'Forecasting Prediction')
plt.legend(loc='best')

In [None]:
# Scaling the data 
scaler = MinMaxScaler()
scaler.fit(train_x)
scaled_train = scaler.transform(train_x)
scaled_test = scaler.transform(test_x)

In [None]:
# Train generator
n_input = 12
n_feature = 14

train_generator = TimeseriesGenerator(scaled_train,train_y.values,length=n_input, batch_size=1)

In [None]:
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_input, n_feature)))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
model.fit(train_generator,epochs= 50)

In [None]:
# Model Loss
loss= model.history.history['loss']
plt.plot(loss)

In [None]:
# Test data predictions
test_predictions = []

# last n_input points from the training set
first_eval_batch = scaled_train[-n_input:]
# reshape this to the format of RNN (same format as TimeseriesGeneration)
current_batch = first_eval_batch.reshape((1,n_input,n_feature))

for i in range(len(test_x)):
    
    # One timestep ahead of historical 12 points
    current_pred = model.predict(current_batch)[0]
    #store that prediction
    test_predictions.append(current_pred)
    
    # update the current batch to include prediction
    current_batch = np.append(current_batch[:,1:,:],[[scaled_test[i]]], axis= 1)

In [None]:
pred = test_predictions

In [None]:
plt.plot(test_y.values, label='Actual Values')
plt.plot(pred,color='red',label = 'Forecasting Prediction')
plt.legend(loc='best')

In [None]:
(rmse, mae, r2) = eval_metrics(test_y, pred)

print("XGBoost model" )
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)