In [68]:
# Python
import pandas as pd
from fbprophet import Prophet
import sklearn.metrics
import math
import csv

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
import csv
sns.set(rc={'figure.figsize':(11, 4)}, font_scale=1.5)

import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../src/data/')

import make_dataset as ma_da
from import_data import import_df_from_zip_pkl

## helpful blog post
## https://towardsdatascience.com/implementing-facebook-prophet-efficiently-c241305405a3

In [11]:
def preprocess(df, window):
    df_night = ma_da.remove_night_time_data(df)
    df_night_clip = ma_da.remove_clipping_with_flexible_window(df_night, verbose=False)

    df_post = df_night_clip.resample(window).mean()
    df_post["LogPower"] = np.log(df_post["Power"])
    
    return df_post

In [70]:
with open(r'prophet_basic_results.csv','w') as f:
    writer = csv.writer(f)
    writer.writerow(["data_set", "calc_rate", "true_rate", "error"])
      
    for i in range(0, 50):
        df = import_df_from_zip_pkl("../data/raw/synthetic_basic.zip", index=i)       
        df.index = df.index.tz_localize(None)
        df_post = preprocess(df, "D")

        df_post= df_post.reset_index()
        fb_df = df_post[["datetime", "Power", "Soiling"]]
        fb_df = fb_df.rename(columns={'datetime':'ds', 'Power':'y'})

        m = Prophet(yearly_seasonality=30, # the larger the number the more fourier terms are included to fit the seasonality
                    weekly_seasonality=False,
                    seasonality_mode='multiplicative',
                    growth='linear',
                    
                    changepoint_prior_scale=0.05,
                    n_changepoints=0,
                    daily_seasonality=False
                   )
        m.add_regressor('Soiling')
        m.fit(fb_df)
        ## prophet is meant to used for prediction, I only ever ask to to predict two more points as below
        future = m.make_future_dataframe(2, freq='D')[:-2]
        future["Soiling"] = fb_df["Soiling"]
        forecast = m.predict(future)
        trend = forecast.trend.values/forecast.trend.values[0]
        calculated_rate = stats.linregress(x=np.arange(trend.size)/365, y=trend).slope
        true_degrad_profile = df_post["Degradation"]
        true_rate = stats.linregress(x=np.arange(true_degrad_profile.size)/365, y=true_degrad_profile).slope
        print("Calculated rate:", calculated_rate)
        print("True rate:", true_rate)
        print("error % per year:", 100*(calculated_rate-true_rate))
        #fig = m.plot(forecast)
        writer.writerow([i, calculated_rate, true_rate, calculated_rate-true_rate])

Calculated rate: -0.011536074782414502
True rate: -0.011703918411984772
error % per year: 0.016784362957027003
Calculated rate: -0.013503416694198623
True rate: -0.013708549214763536
error % per year: 0.02051325205649126
Calculated rate: -0.009729932699576125
True rate: -0.009867932731306837
error % per year: 0.013800003173071278
Calculated rate: -0.01295999079853014
True rate: -0.013152597667662589
error % per year: 0.0192606869132448
Calculated rate: -0.010863509993565128
True rate: -0.011028619641359464
error % per year: 0.016510964779433562
Calculated rate: -0.006314369290588775
True rate: -0.006400170533971974
error % per year: 0.008580124338319894
Calculated rate: -0.005666227686762623
True rate: -0.005745665938353332
error % per year: 0.007943825159070945
Calculated rate: -0.012031078955250293
True rate: -0.012206832601199527
error % per year: 0.017575364594923405
Calculated rate: -0.012393767438838662
True rate: -0.01257356573665551
error % per year: 0.017979829781684768
Calcul

In [71]:
df = pd.read_csv("prophet_basic_results.csv")
100*np.sqrt((df["error"]**2).mean())

0.013566880360170615

In [73]:
with open(r'prophet_soil_results.csv','w') as f:
    writer = csv.writer(f)
    writer.writerow(["data_set", "calc_rate", "true_rate", "error"])
      
    for i in range(0, 50):
        df = import_df_from_zip_pkl("../data/raw/synthetic_soil.zip", index=i)       
        df.index = df.index.tz_localize(None)
        df_post = preprocess(df, "D")

        df_post= df_post.reset_index()
        fb_df = df_post[["datetime", "Power", "Soiling"]]
        fb_df = fb_df.rename(columns={'datetime':'ds', 'Power':'y'})

        m = Prophet(yearly_seasonality=30, # the larger the number the more fourier terms are included to fit the seasonality
                    weekly_seasonality=False,
                    seasonality_mode='multiplicative',
                    growth='linear',
                    changepoint_prior_scale=0.05,
                    daily_seasonality=False
                   )
        m.fit(fb_df)
        future = m.make_future_dataframe(2, freq='D')[:-2]
        forecast = m.predict(future)
        trend = forecast.trend.values/forecast.trend.values[0]
        calculated_rate = stats.linregress(x=np.arange(trend.size)/365, y=trend).slope
        true_degrad_profile = df_post["Degradation"]
        true_rate = stats.linregress(x=np.arange(true_degrad_profile.size)/365, y=true_degrad_profile).slope
        print("Calculated rate:", calculated_rate)
        print("True rate:", true_rate)
        print("error % per year:", 100*(calculated_rate-true_rate))
        #fig = m.plot(forecast)
        writer.writerow([i, calculated_rate, true_rate, calculated_rate-true_rate])

Calculated rate: -0.0011375072978563059
True rate: -0.012097421686591238
error % per year: 1.095991438873493
Calculated rate: -0.012967409582408267
True rate: -0.009688193086279124
error % per year: -0.32792164961291426
Calculated rate: 0.0019296626240521743
True rate: -0.012135683074166575
error % per year: 1.406534569821875
Calculated rate: 0.003808476270538221
True rate: -0.014953007666352354
error % per year: 1.8761483936890575
Calculated rate: 0.011117878319695206
True rate: -0.008038617940768744
error % per year: 1.915649626046395
Calculated rate: -0.031940162610901456
True rate: -0.01227463693988075
error % per year: -1.9665525671020707
Calculated rate: -0.0023705256107465476
True rate: -0.008408819557831333
error % per year: 0.6038293947084785
Calculated rate: -0.002391077885983546
True rate: -0.008242351906770343
error % per year: 0.5851274020786796
Calculated rate: -0.0036953404738239164
True rate: -0.012250600822149457
error % per year: 0.8555260348325541
Calculated rate: -0

In [75]:
df = pd.read_csv("prophet_soil_results.csv")
100*np.sqrt((df["error"]**2).mean())

0.8413707540577627

In [76]:
with open(r'prophet_soil_regressor_results.csv','w') as f:
    writer = csv.writer(f)
    writer.writerow(["data_set", "calc_rate", "true_rate", "error"])
      
    for i in range(0, 50):
        df = import_df_from_zip_pkl("../data/raw/synthetic_soil.zip", index=i)       
        df.index = df.index.tz_localize(None)
        df_post = preprocess(df, "D")

        df_post= df_post.reset_index()
        fb_df = df_post[["datetime", "Power", "Soiling"]]
        fb_df = fb_df.rename(columns={'datetime':'ds', 'Power':'y'})

        m = Prophet(yearly_seasonality=30, # the larger the number the more fourier terms are included to fit the seasonality
                    weekly_seasonality=False,
                    seasonality_mode='multiplicative',
                    growth='linear',
                    changepoint_prior_scale=0.05,
                    daily_seasonality=False
                   )
        m.add_regressor('Soiling')
        m.fit(fb_df)
        future = m.make_future_dataframe(2, freq='D')[:-2]
        future["Soiling"] = fb_df["Soiling"]
        forecast = m.predict(future)
        trend = forecast.trend.values/forecast.trend.values[0]
        calculated_rate = stats.linregress(x=np.arange(trend.size)/365, y=trend).slope
        true_degrad_profile = df_post["Degradation"]
        true_rate = stats.linregress(x=np.arange(true_degrad_profile.size)/365, y=true_degrad_profile).slope
        print("Calculated rate:", calculated_rate)
        print("True rate:", true_rate)
        print("error % per year:", 100*(calculated_rate-true_rate))
        #fig = m.plot(forecast)
        writer.writerow([i, calculated_rate, true_rate, calculated_rate-true_rate])

Calculated rate: -0.011368293458880505
True rate: -0.012097421686591238
error % per year: 0.0729128227710733
Calculated rate: -0.009632073115845746
True rate: -0.009688193086279124
error % per year: 0.005611997043337806
Calculated rate: -0.010721498004165997
True rate: -0.012135683074166575
error % per year: 0.14141850700005784
Calculated rate: -0.0144541257834286
True rate: -0.014953007666352354
error % per year: 0.04988818829237546
Calculated rate: -0.007640668350352501
True rate: -0.008038617940768744
error % per year: 0.039794959041624264
Calculated rate: -0.01156529382729405
True rate: -0.01227463693988075
error % per year: 0.07093431125866997
Calculated rate: -0.007002404959116281
True rate: -0.008408819557831333
error % per year: 0.14064145987150523
Calculated rate: -0.0077365023884656894
True rate: -0.008242351906770343
error % per year: 0.05058495183046536
Calculated rate: -0.01208660621171208
True rate: -0.012250600822149457
error % per year: 0.016399461043737663
Calculated r

In [77]:
df = pd.read_csv("prophet_soil_regressor_results.csv")
100*np.sqrt((df["error"]**2).mean())

0.08120336169552503

In [78]:
with open(r'prophet_weather_results.csv','w') as f:
    writer = csv.writer(f)
    writer.writerow(["data_set", "calc_rate", "true_rate", "error"])
      
    for i in range(0, 50):
        df = import_df_from_zip_pkl("../data/raw/synthetic_weather.zip", index=i)       
        df.index = df.index.tz_localize(None)
        df_post = preprocess(df, "D")

        df_post= df_post.reset_index()
        fb_df = df_post[["datetime", "Power", "Soiling"]]
        fb_df = fb_df.rename(columns={'datetime':'ds', 'Power':'y'})

        m = Prophet(yearly_seasonality=30, # the larger the number the more fourier terms are included to fit the seasonality
                    weekly_seasonality=False,
                    seasonality_mode='multiplicative',
                    growth='linear',
                    changepoint_prior_scale=0.05,
                    daily_seasonality=False
                   )
        m.fit(fb_df)
        future = m.make_future_dataframe(2, freq='D')[:-2]
        forecast = m.predict(future)
        trend = forecast.trend.values/forecast.trend.values[0]
        calculated_rate = stats.linregress(x=np.arange(trend.size)/365, y=trend).slope
        true_degrad_profile = df_post["Degradation"]
        true_rate = stats.linregress(x=np.arange(true_degrad_profile.size)/365, y=true_degrad_profile).slope
        print("Calculated rate:", calculated_rate)
        print("True rate:", true_rate)
        print("error % per year:", 100*(calculated_rate-true_rate))
        #fig = m.plot(forecast)
        writer.writerow([i, calculated_rate, true_rate, calculated_rate-true_rate])

Calculated rate: -0.01379115616741957
True rate: -0.012920523495474625
error % per year: -0.08706326719449456
Calculated rate: -0.006088487965102811
True rate: -0.001932945315542911
error % per year: -0.41555426495599007
Calculated rate: 0.0013610299925527736
True rate: -0.009090509521936838
error % per year: 1.0451539514489612
Calculated rate: -0.011087328507132175
True rate: -0.007268096575188496
error % per year: -0.38192319319436796
Calculated rate: -0.014629268307206013
True rate: -0.005478775198412604
error % per year: -0.9150493108793409
Calculated rate: -0.016638922908773012
True rate: -0.007962057256800097
error % per year: -0.8676865651972916
Calculated rate: -0.015361777005378389
True rate: -0.0100526265600108
error % per year: -0.5309150445367589
Calculated rate: -0.023316170904669222
True rate: -0.01628730868329616
error % per year: -0.7028862221373062
Calculated rate: -0.01308187205932438
True rate: -0.008429308348845109
error % per year: -0.4652563710479271
Calculated ra

In [82]:
with open(r'prophet_weather_regressor_results.csv','w') as f:
    writer = csv.writer(f)
    writer.writerow(["data_set", "calc_rate", "true_rate", "error"])
      
    for i in range(0, 50):
        df = import_df_from_zip_pkl("../data/raw/synthetic_weather.zip", index=i)       
        df.index = df.index.tz_localize(None)
        df_post = preprocess(df, "D")

        df_post= df_post.reset_index()
        fb_df = df_post[["datetime", "Power", "POA"]]
        fb_df = fb_df.rename(columns={'datetime':'ds', 'Power':'y'})

        m = Prophet(yearly_seasonality=30, # the larger the number the more fourier terms are included to fit the seasonality
                    weekly_seasonality=False,
                    seasonality_mode='multiplicative',
                    growth='linear',
                    changepoint_prior_scale=0.05,
                    daily_seasonality=False
                   )
        m.add_regressor('POA')
        m.fit(fb_df)
        future = m.make_future_dataframe(2, freq='D')[:-2]
        future["POA"] = fb_df["POA"]
        forecast = m.predict(future)
        trend = forecast.trend.values/forecast.trend.values[0]
        calculated_rate = stats.linregress(x=np.arange(trend.size)/365, y=trend).slope
        true_degrad_profile = df_post["Degradation"]
        true_rate = stats.linregress(x=np.arange(true_degrad_profile.size)/365, y=true_degrad_profile).slope
        print("Calculated rate:", calculated_rate)
        print("True rate:", true_rate)
        print("error % per year:", 100*(calculated_rate-true_rate))
        #fig = m.plot(forecast)
        writer.writerow([i, calculated_rate, true_rate, calculated_rate-true_rate])

Calculated rate: -0.014592267569733742
True rate: -0.012920523495474625
error % per year: -0.16717440742591172
Calculated rate: -0.004499485110470878
True rate: -0.001932945315542911
error % per year: -0.2566539794927967
Calculated rate: -0.010125407060791642
True rate: -0.009090509521936838
error % per year: -0.10348975388548044
Calculated rate: -0.0082917497784162
True rate: -0.007268096575188496
error % per year: -0.10236532032277049
Calculated rate: -0.00793562135611107
True rate: -0.005478775198412604
error % per year: -0.24568461576984657
Calculated rate: -0.006284288178869742
True rate: -0.007962057256800097
error % per year: 0.16777690779303547
Calculated rate: -0.009387633469255714
True rate: -0.0100526265600108
error % per year: 0.06649930907550866
Calculated rate: -0.017811548875728993
True rate: -0.01628730868329616
error % per year: -0.1524240192432833
Calculated rate: -0.006940346943826598
True rate: -0.008429308348845109
error % per year: 0.14889614050185113
Calculated r

In [80]:
df = pd.read_csv("prophet_weather_results.csv")
100*np.sqrt((df["error"]**2).mean())

0.7310167480585571

In [83]:
df = pd.read_csv("prophet_weather_regressor_results.csv")
100*np.sqrt((df["error"]**2).mean())

0.14738932378301878