In [10]:
import pandas as pd
from prophet import Prophet
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Prepare Data

In [11]:
def preparedata(file):
    cloud_cover = pd.read_csv(file, skipinitialspace = True)
    cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
    cloud_cover['Time'] = cloud_cover[['Hour', 'Minute']].apply(lambda x: "{}:{}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
    cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
    cloud_cover = cloud_cover.groupby(['Date', 'Time']).agg(cloud_cover=('cloud_cover','mean')).reset_index()
    cloud_cover['DateTime'] = cloud_cover[['Date', 'Time']].apply(lambda x: "{} {}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Date", "Time"], axis=1, inplace=True)        
    cloud_cover['DateTime'] = pd.to_datetime(cloud_cover['DateTime'], format="%Y-%m-%d %H:%M")
    cloud_cover.dropna(inplace=True)
    return cloud_cover

In [12]:
cloud_cover = preparedata('Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv')
cloud_cover

Unnamed: 0,cloud_cover,DateTime
0,0.990000,2021-11-16 11:15:00
1,0.989000,2021-11-16 11:16:00
2,0.987667,2021-11-16 11:17:00
3,0.987000,2021-11-16 11:18:00
4,0.987000,2021-11-16 11:19:00
...,...,...
15056,0.168000,2021-12-16 13:58:00
15057,0.169000,2021-12-16 13:59:00
15058,0.147000,2021-12-16 14:00:00
15059,0.145000,2021-12-16 14:01:00


In [13]:
df = cloud_cover[['DateTime', 'cloud_cover']]
df.columns = ['ds', 'y']
df

Unnamed: 0,ds,y
0,2021-11-16 11:15:00,0.990000
1,2021-11-16 11:16:00,0.989000
2,2021-11-16 11:17:00,0.987667
3,2021-11-16 11:18:00,0.987000
4,2021-11-16 11:19:00,0.987000
...,...,...
15056,2021-12-16 13:58:00,0.168000
15057,2021-12-16 13:59:00,0.169000
15058,2021-12-16 14:00:00,0.147000
15059,2021-12-16 14:01:00,0.145000


# Train/Test Model

In [14]:
train,test = train_test_split(df, train_size=0.8, shuffle=False)
print('Train Shape', train.shape)
print('Test Shape', test.shape)

Train Shape (12048, 2)
Test Shape (3013, 2)


# Time Series Forecasting with Prophet Prediction

In [15]:
model = Prophet()
model.fit(train)
predict = model.predict(test)

19:46:08 - cmdstanpy - INFO - Chain [1] start processing
19:46:31 - cmdstanpy - INFO - Chain [1] done processing


In [16]:
predict.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3013 entries, 0 to 3012
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   ds                          3013 non-null   datetime64[ns]
 1   trend                       3013 non-null   float64       
 2   yhat_lower                  3013 non-null   float64       
 3   yhat_upper                  3013 non-null   float64       
 4   trend_lower                 3013 non-null   float64       
 5   trend_upper                 3013 non-null   float64       
 6   additive_terms              3013 non-null   float64       
 7   additive_terms_lower        3013 non-null   float64       
 8   additive_terms_upper        3013 non-null   float64       
 9   daily                       3013 non-null   float64       
 10  daily_lower                 3013 non-null   float64       
 11  daily_upper                 3013 non-null   float64     

In [17]:
predict = predict[['ds','yhat']]
predict

Unnamed: 0,ds,yhat
0,2021-12-10 15:24:00,0.096023
1,2021-12-10 15:25:00,0.094978
2,2021-12-10 15:26:00,0.093943
3,2021-12-10 15:27:00,0.092918
4,2021-12-10 15:28:00,0.091904
...,...,...
3008,2021-12-16 13:58:00,0.029173
3009,2021-12-16 13:59:00,0.027939
3010,2021-12-16 14:00:00,0.026700
3011,2021-12-16 14:01:00,0.025456


In [18]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines', x= df['ds'], y=df["y"], name='Actual'))
fig.add_trace(go.Scatter(mode='lines', x=predict['ds'], y = predict["yhat"], name='Predict'))

fig.update_layout(
    autosize=True,
    height=600,
    title="Prophet Model",
    xaxis_title="Date",
    yaxis_title="Cloud cover",
)


fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1.zoom x1",
                     step="hour",
                     stepmode="backward"),
                 dict(count=2,
                     label="2.zoom x2",
                     step="hour",
                     stepmode="backward"),
                dict(count=3,
                     label="3.zoom x3",
                     step="day",
                     stepmode="backward"),              

                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [19]:
prophet_mse_error = mean_squared_error(test["y"], predict["yhat"], squared=True)
print(f'MSE Error: {prophet_mse_error}')

MSE Error: 0.3995924133885686


---