In [1]:
import pandas as pd
from prophet import Prophet
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

  from .autonotebook import tqdm as notebook_tqdm


# Prepare Data

In [2]:
cloud_cover = pd.read_csv('Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv',skipinitialspace = True)
cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
cloud_cover['Time'] = cloud_cover[['Hour', 'Minute']].apply(lambda x: "{}:{}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
cloud_cover = cloud_cover.groupby(['Date', 'Time']).agg(cloud_cover=('cloud_cover','mean')).reset_index()
cloud_cover['DateTime'] = cloud_cover[['Date', 'Time']].apply(lambda x: "{} {}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
cloud_cover.drop(["Date", "Time"], axis=1, inplace=True)        
cloud_cover['DateTime'] = pd.to_datetime(cloud_cover['DateTime'], format="%Y-%m-%d %H:%M")
cloud_cover.dropna(inplace=True)
cloud_cover

Unnamed: 0,cloud_cover,DateTime
0,0.990000,2021-11-16 11:15:00
1,0.989000,2021-11-16 11:16:00
2,0.987667,2021-11-16 11:17:00
3,0.987000,2021-11-16 11:18:00
4,0.987000,2021-11-16 11:19:00
...,...,...
15056,0.168000,2021-12-16 13:58:00
15057,0.169000,2021-12-16 13:59:00
15058,0.147000,2021-12-16 14:00:00
15059,0.145000,2021-12-16 14:01:00


In [3]:
df = cloud_cover[['DateTime', 'cloud_cover']]
df.columns = ['ds', 'y']
df = df[df['ds'] < '2021-12-16']
df = df[df['ds'] > '2021-11-23']
df

Unnamed: 0,ds,y
3585,2021-11-23 08:00:00,0.915
3586,2021-11-23 08:01:00,0.914
3587,2021-11-23 08:02:00,0.989
3588,2021-11-23 08:03:00,0.991
3589,2021-11-23 08:04:00,0.913
...,...,...
14763,2021-12-15 16:55:00,0.321
14764,2021-12-15 16:56:00,0.304
14765,2021-12-15 16:57:00,0.286
14766,2021-12-15 16:58:00,0.344


### Train/Test Model

In [6]:
#testset one day
train = df[df['ds'] < '2021-12-15']
test = df[df['ds'] >= '2021-12-15']

print('train Shape', train.shape)
print('test Shape', test.shape)

train Shape (10656, 2)
test Shape (527, 2)


In [8]:
from sklearn.model_selection import train_test_split
train1,test1 = train_test_split(df, train_size=0.9, shuffle=False)
print('Train Shape', train1.shape)
print('Test Shape', test1.shape)

Train Shape (10064, 2)
Test Shape (1119, 2)


# Time Series Forecasting with Prophet Prediction

In [7]:
model = Prophet()
model.fit(train)
predict = model.predict(test)
predict1 = predict[['ds', 'yhat']]
predict1

18:54:55 - cmdstanpy - INFO - Chain [1] start processing
18:55:14 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat
0,2021-12-15 08:00:00,0.312158
1,2021-12-15 08:01:00,0.308660
2,2021-12-15 08:02:00,0.305271
3,2021-12-15 08:03:00,0.301989
4,2021-12-15 08:04:00,0.298811
...,...,...
522,2021-12-15 16:55:00,0.131490
523,2021-12-15 16:56:00,0.130091
524,2021-12-15 16:57:00,0.128624
525,2021-12-15 16:58:00,0.127087


In [9]:
model1 = Prophet()
model1.fit(train1)
predict2 = model1.predict(test1)
predict3 = predict2[['ds', 'yhat']]
predict3

18:55:44 - cmdstanpy - INFO - Chain [1] start processing
18:55:51 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat
0,2021-12-13 15:44:00,0.457436
1,2021-12-13 15:45:00,0.457423
2,2021-12-13 15:46:00,0.457422
3,2021-12-13 15:47:00,0.457433
4,2021-12-13 15:48:00,0.457454
...,...,...
1114,2021-12-15 16:55:00,0.153458
1115,2021-12-15 16:56:00,0.152895
1116,2021-12-15 16:57:00,0.152291
1117,2021-12-15 16:58:00,0.151644


In [10]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x=predict1['ds'], y = predict1["yhat"], name='Forecast prophet predict test set'))
fig.add_trace(go.Scatter(mode='lines', x=df['ds'], y = df["y"],  name='Real Data'))
fig.add_trace(go.Scatter(mode='lines', x=predict3['ds'], y = predict3["yhat"],  name='Forecast prophet predict test set one day'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

# Evaluating forecast accuracy

- **MAE** ย่อมาจาก Mean Absolute Error หรือเรียกอีกชื่อหนึ่งว่า L1 Loss ค่า MAE นี้ชื่อก็บอกอยู่แล้วว่าใช้ Absolute มาช่วยทำให้ค่า Error กลายเป็นบวก สูตรคำนวนจึงเป็นการนำค่า Error มาใส่ Absolute ก่อนที่จะนำมาหาค่าเฉลี่ยของ Error
- **MSE** ย่อมาจาก Mean Square Error หรือเรียกอีกชื่อหนึ่งว่า L2 Loss เช่นเดียวกัน ค่า MSE จะมีการทำให้ค่า Error กลายเป็นบวกก่อนโดยการนำค่า Error มายกกำลังสอง ก่อนที่จะนำค่า Error มาหาค่าเฉลี่ย
- **RMSE** ย่อมาจาก Root Mean Square Error เป็น Loss Function ที่จะนำค่า MSE มาใส่ Square Root จึงทำให้มีคุณสมบัติที่คล้ายกับค่า MSE แต่ต่างกันตรงที่ หน่วยของค่า Error จะไม่มีเลขยกกำลังสอง จึงทำให้อ่านค่าได้ง่ายกว่า เนื่องจากหน่วยของ RMSE นั้นมีหน่วยเดียวกันกับค่าที่โมเดลทำนายไว้

In [11]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [12]:
prophet_mse_error = mean_squared_error(test["y"], predict1["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error}')

MSE Error: 0.03163649275587413


In [13]:
prophet_mse_error1 = mean_squared_error(test1["y"], predict3["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error1}')

MSE Error: 0.0553084745800468
