In [1]:
import pandas as pd
from prophet import Prophet
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

  from .autonotebook import tqdm as notebook_tqdm


# Prepare Data

In [2]:
def preparedata(file):
    cloud_cover = pd.read_csv(file, skipinitialspace = True)
    cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
    cloud_cover['Time'] = cloud_cover[['Hour', 'Minute']].apply(lambda x: "{}:{}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
    cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
    cloud_cover = cloud_cover.groupby(['Date', 'Time']).agg(cloud_cover=('cloud_cover','mean')).reset_index()
    cloud_cover['DateTime'] = cloud_cover[['Date', 'Time']].apply(lambda x: "{} {}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Date", "Time"], axis=1, inplace=True)        
    cloud_cover['DateTime'] = pd.to_datetime(cloud_cover['DateTime'], format="%Y-%m-%d %H:%M")
    cloud_cover.dropna(inplace=True)
    return cloud_cover

In [3]:
cloud_cover = preparedata('Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv')
cloud_cover

Unnamed: 0,cloud_cover,DateTime
0,0.990000,2021-11-16 11:15:00
1,0.989000,2021-11-16 11:16:00
2,0.987667,2021-11-16 11:17:00
3,0.987000,2021-11-16 11:18:00
4,0.987000,2021-11-16 11:19:00
...,...,...
15056,0.168000,2021-12-16 13:58:00
15057,0.169000,2021-12-16 13:59:00
15058,0.147000,2021-12-16 14:00:00
15059,0.145000,2021-12-16 14:01:00


In [4]:
df = cloud_cover[['DateTime', 'cloud_cover']]
df.columns = ['ds', 'y']
df

Unnamed: 0,ds,y
0,2021-11-16 11:15:00,0.990000
1,2021-11-16 11:16:00,0.989000
2,2021-11-16 11:17:00,0.987667
3,2021-11-16 11:18:00,0.987000
4,2021-11-16 11:19:00,0.987000
...,...,...
15056,2021-12-16 13:58:00,0.168000
15057,2021-12-16 13:59:00,0.169000
15058,2021-12-16 14:00:00,0.147000
15059,2021-12-16 14:01:00,0.145000


# Train/Test dataset

In [5]:
#testset one day
train = df[df['ds'] < '2021-12-16 00:00:00']
test = df[df['ds'] >= '2021-12-16 00:00:00']

print('train Shape', train.shape)
print('test Shape', test.shape)

train Shape (14768, 2)
test Shape (293, 2)


# Model use testset one day

In [6]:
model = Prophet()
model.fit(train)
predict = model.predict(test)
predict

17:14:02 - cmdstanpy - INFO - Chain [1] start processing
17:14:34 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,daily_lower,daily_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-12-16 09:10:00,0.004909,-0.179961,0.346846,0.004909,0.004909,0.078284,0.078284,0.078284,0.237035,0.237035,0.237035,-0.158751,-0.158751,-0.158751,0.0,0.0,0.0,0.083193
1,2021-12-16 09:11:00,0.004890,-0.187682,0.346900,0.004890,0.004890,0.078757,0.078757,0.078757,0.237354,0.237354,0.237354,-0.158597,-0.158597,-0.158597,0.0,0.0,0.0,0.083647
2,2021-12-16 09:12:00,0.004870,-0.189503,0.378636,0.004870,0.004870,0.079247,0.079247,0.079247,0.237690,0.237690,0.237690,-0.158443,-0.158443,-0.158443,0.0,0.0,0.0,0.084116
3,2021-12-16 09:13:00,0.004850,-0.184471,0.353154,0.004850,0.004850,0.079752,0.079752,0.079752,0.238042,0.238042,0.238042,-0.158290,-0.158290,-0.158290,0.0,0.0,0.0,0.084602
4,2021-12-16 09:14:00,0.004830,-0.178261,0.380490,0.004830,0.004830,0.080273,0.080273,0.080273,0.238409,0.238409,0.238409,-0.158136,-0.158136,-0.158136,0.0,0.0,0.0,0.085102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288,2021-12-16 13:58:00,-0.000821,-0.102428,0.423440,-0.000821,-0.000821,0.174338,0.174338,0.174338,0.288412,0.288412,0.288412,-0.114074,-0.114074,-0.114074,0.0,0.0,0.0,0.173517
289,2021-12-16 13:59:00,-0.000841,-0.091128,0.464486,-0.000841,-0.000841,0.173289,0.173289,0.173289,0.287211,0.287211,0.287211,-0.113923,-0.113923,-0.113923,0.0,0.0,0.0,0.172448
290,2021-12-16 14:00:00,-0.000861,-0.102980,0.431865,-0.000861,-0.000861,0.172233,0.172233,0.172233,0.286004,0.286004,0.286004,-0.113771,-0.113771,-0.113771,0.0,0.0,0.0,0.171372
291,2021-12-16 14:01:00,-0.000881,-0.108982,0.455815,-0.000881,-0.000881,0.171171,0.171171,0.171171,0.284791,0.284791,0.284791,-0.113620,-0.113620,-0.113620,0.0,0.0,0.0,0.170290


In [7]:
predict.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 293 entries, 0 to 292
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   ds                          293 non-null    datetime64[ns]
 1   trend                       293 non-null    float64       
 2   yhat_lower                  293 non-null    float64       
 3   yhat_upper                  293 non-null    float64       
 4   trend_lower                 293 non-null    float64       
 5   trend_upper                 293 non-null    float64       
 6   additive_terms              293 non-null    float64       
 7   additive_terms_lower        293 non-null    float64       
 8   additive_terms_upper        293 non-null    float64       
 9   daily                       293 non-null    float64       
 10  daily_lower                 293 non-null    float64       
 11  daily_upper                 293 non-null    float64       

In [8]:
predict1 = predict[['ds', 'yhat']]
predict1

Unnamed: 0,ds,yhat
0,2021-12-16 09:10:00,0.083193
1,2021-12-16 09:11:00,0.083647
2,2021-12-16 09:12:00,0.084116
3,2021-12-16 09:13:00,0.084602
4,2021-12-16 09:14:00,0.085102
...,...,...
288,2021-12-16 13:58:00,0.173517
289,2021-12-16 13:59:00,0.172448
290,2021-12-16 14:00:00,0.171372
291,2021-12-16 14:01:00,0.170290


In [12]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df['ds'], y=df["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict1['ds'], y = predict1["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

# Evaluating forecast accuracy

- **MAE** ย่อมาจาก Mean Absolute Error หรือเรียกอีกชื่อหนึ่งว่า L1 Loss ค่า MAE นี้ชื่อก็บอกอยู่แล้วว่าใช้ Absolute มาช่วยทำให้ค่า Error กลายเป็นบวก สูตรคำนวนจึงเป็นการนำค่า Error มาใส่ Absolute ก่อนที่จะนำมาหาค่าเฉลี่ยของ Error
- **MSE** ย่อมาจาก Mean Square Error หรือเรียกอีกชื่อหนึ่งว่า L2 Loss เช่นเดียวกัน ค่า MSE จะมีการทำให้ค่า Error กลายเป็นบวกก่อนโดยการนำค่า Error มายกกำลังสอง ก่อนที่จะนำค่า Error มาหาค่าเฉลี่ย
- **RMSE** ย่อมาจาก Root Mean Square Error เป็น Loss Function ที่จะนำค่า MSE มาใส่ Square Root จึงทำให้มีคุณสมบัติที่คล้ายกับค่า MSE แต่ต่างกันตรงที่ หน่วยของค่า Error จะไม่มีเลขยกกำลังสอง จึงทำให้อ่านค่าได้ง่ายกว่า เนื่องจากหน่วยของ RMSE นั้นมีหน่วยเดียวกันกับค่าที่โมเดลทำนายไว้

In [10]:
from sklearn.metrics import mean_squared_error

In [15]:
prophet_mse_error = mean_squared_error(test["y"], predict1["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error}')

MSE Error: 0.03358633027798148


----------------------------------------------------------------------------------------------

# Prepare Data to HourTime

In [2]:
def preparedata_hourtime(file):
    cloud_cover = pd.read_csv(file,skipinitialspace = True)
    cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
    cloud_cover['Time'] = cloud_cover[['Hour']].apply(lambda x: "{}".format(str(x[0]).zfill(2)), axis=1)
    cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
    cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
    cloud_cover = cloud_cover.groupby(['Date', 'Time']).agg(cloud_cover=('cloud_cover','mean')).reset_index()
    cloud_cover['DateTime'] = cloud_cover[['Date', 'Time']].apply(lambda x: "{} {}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Date", "Time"], axis=1, inplace=True)        
    cloud_cover['DateTime'] = pd.to_datetime(cloud_cover['DateTime'], format="%Y-%m-%d %H:%M")
    cloud_cover.dropna(inplace=True)
    return cloud_cover

In [3]:
cloud_cover1 = preparedata_hourtime('Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv')
cloud_cover1

Unnamed: 0,cloud_cover,DateTime
0,0.990899,2021-11-16 11:00:00
1,0.973114,2021-11-16 12:00:00
2,0.960600,2021-11-16 13:00:00
3,0.908915,2021-11-16 14:00:00
4,0.631449,2021-11-16 15:00:00
...,...,...
251,0.480200,2021-12-16 10:00:00
252,0.155117,2021-12-16 11:00:00
253,0.161317,2021-12-16 12:00:00
254,0.174650,2021-12-16 13:00:00


In [4]:
df1 = cloud_cover1[['DateTime', 'cloud_cover']]
df1.columns = ['ds', 'y']
df1

Unnamed: 0,ds,y
0,2021-11-16 11:00:00,0.990899
1,2021-11-16 12:00:00,0.973114
2,2021-11-16 13:00:00,0.960600
3,2021-11-16 14:00:00,0.908915
4,2021-11-16 15:00:00,0.631449
...,...,...
251,2021-12-16 10:00:00,0.480200
252,2021-12-16 11:00:00,0.155117
253,2021-12-16 12:00:00,0.161317
254,2021-12-16 13:00:00,0.174650


## spilt data use train_test_spilt

In [12]:
from sklearn.model_selection import train_test_split
train1,test1 = train_test_split(df1, train_size=0.8, shuffle=False)
print('Train Shape', train1.shape)
print('Test Shape', test1.shape)

train Shape (204, 2)
test Shape (52, 2)


In [31]:
train1

Unnamed: 0,ds,y
0,2021-11-16 11:00:00,0.990899
1,2021-11-16 12:00:00,0.973114
2,2021-11-16 13:00:00,0.960600
3,2021-11-16 14:00:00,0.908915
4,2021-11-16 15:00:00,0.631449
...,...,...
199,2021-12-10 11:00:00,0.132593
200,2021-12-10 12:00:00,0.638319
201,2021-12-10 13:00:00,0.712340
202,2021-12-10 14:00:00,0.515441


In [32]:
test1

Unnamed: 0,ds,y
204,2021-12-10 16:00:00,0.29485
205,2021-12-11 08:00:00,0.863067
206,2021-12-11 09:00:00,0.670333
207,2021-12-11 10:00:00,0.886633
208,2021-12-11 11:00:00,0.505052
209,2021-12-11 12:00:00,0.467982
210,2021-12-11 13:00:00,0.407017
211,2021-12-11 14:00:00,0.247034
212,2021-12-11 15:00:00,0.224317
213,2021-12-11 16:00:00,0.564383


In [13]:
model1 = Prophet()
model1.fit(train1)
predict2 = model1.predict(test1)
predict3 = predict2[['ds', 'yhat']]
predict3

18:39:16 - cmdstanpy - INFO - Chain [1] start processing
18:39:16 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat
0,2021-12-10 16:00:00,0.138871
1,2021-12-11 08:00:00,0.062373
2,2021-12-11 09:00:00,0.037371
3,2021-12-11 10:00:00,0.070883
4,2021-12-11 11:00:00,0.128288
5,2021-12-11 12:00:00,0.15509
6,2021-12-11 13:00:00,0.116684
7,2021-12-11 14:00:00,0.027176
8,2021-12-11 15:00:00,-0.057529
9,2021-12-11 16:00:00,-0.079002


In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df1['ds'], y=df1["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict3['ds'], y = predict3["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [15]:
prophet_mse_error1 = mean_squared_error(test1["y"], predict3["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error1}')

MSE Error: 0.1830991851067643


---

## spilt data use last day

In [37]:
train3 = df1[df1['ds'] < '2021-12-16 00:00:00']
test3 = df1[df1['ds'] >= '2021-12-16 00:00:00']

print('train Shape', train3.shape)
print('test Shape', test3.shape)

train Shape (250, 2)
test Shape (6, 2)


In [38]:
model3 = Prophet()
model3.fit(train3)
predict6 = model3.predict(test3)
predict7 = predict6[['ds', 'yhat']]
predict7

20:01:32 - cmdstanpy - INFO - Chain [1] start processing
20:01:32 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat
0,2021-12-16 09:00:00,0.310187
1,2021-12-16 10:00:00,0.345585
2,2021-12-16 11:00:00,0.384894
3,2021-12-16 12:00:00,0.402292
4,2021-12-16 13:00:00,0.381478
5,2021-12-16 14:00:00,0.327173


In [39]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df1['ds'], y=df1["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict7['ds'], y = predict7["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [40]:
prophet_mse_error3 = mean_squared_error(test3["y"], predict7["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error3}')

MSE Error: 0.037805405829864795


# Prepare Data to DateTime

In [17]:
def preparedata_datetime(file):
    cloud_cover = pd.read_csv(file,skipinitialspace = True)
    cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
    cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
    cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
    cloud_cover = cloud_cover.groupby(['Date']).agg(cloud_cover=('cloud_cover','mean')).reset_index()    
    cloud_cover['Date'] = pd.to_datetime(cloud_cover['Date'], format="%Y-%m-%d")
    cloud_cover.dropna(inplace=True)
    return cloud_cover

In [18]:
cloud_cover2 = preparedata_datetime('Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv')
cloud_cover2

Unnamed: 0,Date,cloud_cover
0,2021-11-16,0.769193
1,2021-11-17,0.364608
2,2021-11-18,0.878172
3,2021-11-19,0.529353
4,2021-11-20,0.449684
5,2021-11-21,0.443777
6,2021-11-22,0.496665
7,2021-11-23,0.954683
8,2021-11-24,0.665824
9,2021-11-25,0.369516


In [19]:
df2 = cloud_cover2[['Date', 'cloud_cover']]
df2.columns = ['ds', 'y']
df2

Unnamed: 0,ds,y
0,2021-11-16,0.769193
1,2021-11-17,0.364608
2,2021-11-18,0.878172
3,2021-11-19,0.529353
4,2021-11-20,0.449684
5,2021-11-21,0.443777
6,2021-11-22,0.496665
7,2021-11-23,0.954683
8,2021-11-24,0.665824
9,2021-11-25,0.369516


In [27]:
train2,test2 = train_test_split(df2, train_size=0.8, shuffle=False)
print('Train Shape', train2.shape)
print('Test Shape', test2.shape)

Train Shape (24, 2)
Test Shape (6, 2)


In [28]:
train2

Unnamed: 0,ds,y
0,2021-11-16,0.769193
1,2021-11-17,0.364608
2,2021-11-18,0.878172
3,2021-11-19,0.529353
4,2021-11-20,0.449684
5,2021-11-21,0.443777
6,2021-11-22,0.496665
7,2021-11-23,0.954683
8,2021-11-24,0.665824
9,2021-11-25,0.369516


In [29]:
test2

Unnamed: 0,ds,y
24,2021-12-11,0.539296
25,2021-12-12,0.671356
26,2021-12-13,0.476423
27,2021-12-14,0.260594
28,2021-12-15,0.085025
29,2021-12-16,0.228007


In [21]:
model2 = Prophet()
model2.fit(train2)
predict4 = model2.predict(test2)
predict5 = predict4[['ds','yhat']]
predict5

18:40:35 - cmdstanpy - INFO - Chain [1] start processing
18:40:35 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat
0,2021-12-11,0.133853
1,2021-12-12,0.323609
2,2021-12-13,0.400169
3,2021-12-14,0.491077
4,2021-12-15,0.305829
5,2021-12-16,0.345998


In [22]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df2['ds'], y=df2["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict5['ds'], y = predict5["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [23]:
prophet_mse_error2 = mean_squared_error(test2["y"], predict5["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error2}')

MSE Error: 0.0678209461490802
