In [1]:
import pandas as pd
from prophet import Prophet
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def preparedata(file):
    cloud_cover = pd.read_csv(file, skipinitialspace = True)
    cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
    cloud_cover['Time'] = cloud_cover[['Hour', 'Minute']].apply(lambda x: "{}:{}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
    cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
    cloud_cover = cloud_cover.groupby(['Date', 'Time']).agg(cloud_cover=('cloud_cover','mean')).reset_index()
    cloud_cover['DateTime'] = cloud_cover[['Date', 'Time']].apply(lambda x: "{} {}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Date", "Time"], axis=1, inplace=True)        
    cloud_cover['DateTime'] = pd.to_datetime(cloud_cover['DateTime'], format="%Y-%m-%d %H:%M")
    cloud_cover.dropna(inplace=True)
    return cloud_cover

In [3]:
cloud_cover = preparedata('../succeed-prophet/Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv')
cloud_cover

Unnamed: 0,cloud_cover,DateTime
0,0.990000,2021-11-16 11:15:00
1,0.989000,2021-11-16 11:16:00
2,0.987667,2021-11-16 11:17:00
3,0.987000,2021-11-16 11:18:00
4,0.987000,2021-11-16 11:19:00
...,...,...
15056,0.168000,2021-12-16 13:58:00
15057,0.169000,2021-12-16 13:59:00
15058,0.147000,2021-12-16 14:00:00
15059,0.145000,2021-12-16 14:01:00


In [4]:
df = cloud_cover[['DateTime', 'cloud_cover']]
df.columns = ['ds', 'y']
df

Unnamed: 0,ds,y
0,2021-11-16 11:15:00,0.990000
1,2021-11-16 11:16:00,0.989000
2,2021-11-16 11:17:00,0.987667
3,2021-11-16 11:18:00,0.987000
4,2021-11-16 11:19:00,0.987000
...,...,...
15056,2021-12-16 13:58:00,0.168000
15057,2021-12-16 13:59:00,0.169000
15058,2021-12-16 14:00:00,0.147000
15059,2021-12-16 14:01:00,0.145000


# TimeSeriesSplit for cross validation

In [5]:
cv = TimeSeriesSplit(n_splits=5)
mse = []
for train,test in cv.split(df):
    print('TRAIN:', train.shape, 'TEST:', test.shape) 
    train, test = df.iloc[train, :], df.iloc[test, :]
    model = Prophet()
    model.fit(train)
    predict = model.predict(test)
    predict1 = predict[['ds','yhat']]
    prophet_mse_error = mean_squared_error(test['y'], predict1['yhat'], squared=True)
    print('prophet_mse_error : ', prophet_mse_error)
    print('\n')
    mse.append(prophet_mse_error)

TRAIN: (2511,) TEST: (2510,)


14:30:58 - cmdstanpy - INFO - Chain [1] start processing
14:30:59 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  2.1271282158149236


TRAIN: (5021,) TEST: (2510,)


14:31:01 - cmdstanpy - INFO - Chain [1] start processing
14:31:07 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  1.417568048495404


TRAIN: (7531,) TEST: (2510,)


14:31:09 - cmdstanpy - INFO - Chain [1] start processing
14:31:19 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.9799942387299372


TRAIN: (10041,) TEST: (2510,)


14:31:20 - cmdstanpy - INFO - Chain [1] start processing
14:31:32 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.2864453555922928


TRAIN: (12551,) TEST: (2510,)


14:31:34 - cmdstanpy - INFO - Chain [1] start processing
14:31:52 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.13780740101957514




In [22]:
predict1

Unnamed: 0,ds,yhat
0,2021-12-11 14:55:00,0.389999
1,2021-12-11 14:56:00,0.388953
2,2021-12-11 14:58:00,0.386902
3,2021-12-11 14:59:00,0.385896
4,2021-12-11 15:00:00,0.384903
...,...,...
2505,2021-12-16 13:58:00,0.545039
2506,2021-12-16 13:59:00,0.543834
2507,2021-12-16 14:00:00,0.542627
2508,2021-12-16 14:01:00,0.541419


In [23]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df['ds'], y=df["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict1['ds'], y = predict1["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [11]:
print('Prophet_mse_error : ', prophet_mse_error)

Prophet_mse_error :  0.13780740101957514


In [12]:
mean_prophet = np.mean(mse)
print ("Mean MSE:", mean_prophet) 

Mean MSE: 0.9897886519304266


---

# TimeSeriesSplit n_splits = 3

In [13]:
cv1 = TimeSeriesSplit(n_splits=3)
mse1 = []
for train1,test1 in cv1.split(df):
    print('TRAIN:', train1.shape, 'TEST:', test1.shape) 
    train1, test1 = df.iloc[train1, :], df.iloc[test1, :]
    model1 = Prophet()
    model1.fit(train1)
    predict2 = model1.predict(test1)
    predict3 = predict2[['ds','yhat']]
    prophet_mse_error1 = mean_squared_error(test1['y'], predict3['yhat'], squared=True)
    print('prophet_mse_error : ', prophet_mse_error1)
    print('\n')
    mse1.append(prophet_mse_error1)

TRAIN: (3766,) TEST: (3765,)


14:38:26 - cmdstanpy - INFO - Chain [1] start processing
14:38:29 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  1.9747832024401102


TRAIN: (7531,) TEST: (3765,)


14:38:31 - cmdstanpy - INFO - Chain [1] start processing
14:38:38 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  3.099869913604117


TRAIN: (11296,) TEST: (3765,)


14:38:41 - cmdstanpy - INFO - Chain [1] start processing
14:38:54 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  1.1804394163190193




In [24]:
predict3

Unnamed: 0,ds,yhat
0,2021-12-09 11:17:00,0.117183
1,2021-12-09 11:18:00,0.118113
2,2021-12-09 11:19:00,0.119031
3,2021-12-09 11:20:00,0.119937
4,2021-12-09 11:21:00,0.120831
...,...,...
3760,2021-12-16 13:58:00,-0.762497
3761,2021-12-16 13:59:00,-0.763996
3762,2021-12-16 14:00:00,-0.765496
3763,2021-12-16 14:01:00,-0.766998


In [25]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df['ds'], y=df["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict3['ds'], y = predict3["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [15]:
print('Prophet_mse_error : ', prophet_mse_error1)

Prophet_mse_error :  1.1804394163190193


In [16]:
mean_prophet1 = np.mean(mse1)
print ("Mean MSE:", mean_prophet1) 

Mean MSE: 2.085030844121082


---

# TimeSeriesSplit n_splits = 10

In [17]:
cv2 = TimeSeriesSplit(n_splits=10)
mse2 = []
for train2,test2 in cv2.split(df):
    print('TRAIN:', train2.shape, 'TEST:', test2.shape) 
    train2, test2 = df.iloc[train2, :], df.iloc[test2, :]
    model2 = Prophet()
    model2.fit(train2)
    predict4 = model2.predict(test2)
    predict5 = predict4[['ds','yhat']]
    prophet_mse_error2 = mean_squared_error(test2['y'], predict5['yhat'], squared=True)
    print('prophet_mse_error : ', prophet_mse_error2)
    print('\n')
    mse2.append(prophet_mse_error2)

TRAIN: (1371,) TEST: (1369,)


14:42:03 - cmdstanpy - INFO - Chain [1] start processing
14:42:04 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.17149558001222304


TRAIN: (2740,) TEST: (1369,)


14:42:05 - cmdstanpy - INFO - Chain [1] start processing
14:42:07 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.0899013738953337


TRAIN: (4109,) TEST: (1369,)


14:42:09 - cmdstanpy - INFO - Chain [1] start processing
14:42:12 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  1.780148208733615


TRAIN: (5478,) TEST: (1369,)


14:42:14 - cmdstanpy - INFO - Chain [1] start processing
14:42:17 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.09491151524075754


TRAIN: (6847,) TEST: (1369,)


14:42:19 - cmdstanpy - INFO - Chain [1] start processing
14:42:24 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  1.269216378952773


TRAIN: (8216,) TEST: (1369,)


14:42:25 - cmdstanpy - INFO - Chain [1] start processing
14:42:31 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.5436417408917604


TRAIN: (9585,) TEST: (1369,)


14:42:33 - cmdstanpy - INFO - Chain [1] start processing
14:42:44 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.20536887097140935


TRAIN: (10954,) TEST: (1369,)


14:42:46 - cmdstanpy - INFO - Chain [1] start processing
14:43:01 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.7365825007984954


TRAIN: (12323,) TEST: (1369,)


14:43:02 - cmdstanpy - INFO - Chain [1] start processing
14:43:26 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.11890240122484677


TRAIN: (13692,) TEST: (1369,)


14:43:27 - cmdstanpy - INFO - Chain [1] start processing
14:43:50 - cmdstanpy - INFO - Chain [1] done processing


prophet_mse_error :  0.899669750577929




In [26]:
predict5

Unnamed: 0,ds,yhat
0,2021-12-13 16:28:00,0.632820
1,2021-12-13 16:29:00,0.632978
2,2021-12-13 16:30:00,0.633118
3,2021-12-13 16:31:00,0.633239
4,2021-12-13 16:32:00,0.633339
...,...,...
1364,2021-12-16 13:58:00,1.252519
1365,2021-12-16 13:59:00,1.251332
1366,2021-12-16 14:00:00,1.250141
1367,2021-12-16 14:01:00,1.248947


In [27]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df['ds'], y=df["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict5['ds'], y = predict5["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [19]:
print('Prophet_mse_error : ', prophet_mse_error2)

Prophet_mse_error :  0.899669750577929


In [20]:
mean_prophet2 = np.mean(mse2)
print ("Mean MSE:", mean_prophet2) 

Mean MSE: 0.5909838321299142


---