In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from datetime import datetime as dt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.utils.validation import column_or_1d

In [3]:
from bokeh.plotting import figure, output_file, show

In [4]:
df = pd.read_csv('ACISDailyData-20170101-20190101-PID144512549.csv', parse_dates = True, index_col='Date (Local Standard Time)')
df.drop('Station Name', axis=1, inplace=True)

In [5]:
X = df['Wind Speed 2 m Avg. (km/h)'][0:-1]
y = df['Wind Speed 2 m Avg. (km/h)'].shift(-1)[0:-1]
X = X.values.reshape(-1,1)
y = y.values.reshape(-1,1)
y = column_or_1d(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
param_grid = {'C': [0.1,1,5, 10,50, 100], 'gamma': [1,0.1,0.01,0.001,0.0001]} 
grid = GridSearchCV(SVR(),param_grid,refit=True,cv=5, iid=False)
grid.fit(X_train,y_train)
grid_predictions = grid.predict(X_test)
Input = X[-1].reshape(1,-1)
Output = grid.predict(Input)
Return = Output[0]
df['predicted'] = grid.predict(df['Wind Speed 2 m Avg. (km/h)'].values.reshape(-1,1))
p = figure(width = 500, height = 250, x_axis_type = "datetime", sizing_mode='scale_both')
p.line(df.index, df['predicted'], line_width = 2, color = "Orange", alpha = 0.5, legend='Predicted')
p.line(df.index, df['Wind Speed 2 m Avg. (km/h)'], line_width = 2, color = "Blue", alpha = 1, legend='Actual')
p.legend.location = "top_left"
p.legend.click_policy="hide"
output_file('windmodel2.html')
show(p)
print('Tomorrow\'s predicted average wind speed: ' + str(Return))
print('Confidence: ' + str(round(100 * np.sqrt(mse(y_test,grid_predictions))/Return, 2)) + '%')

Tomorrow's predicted average wind speed: 9.736604892027866
Confidence: 50.92%
