In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import requests
import json
import os
from sqlalchemy import create_engine
import pymysql
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [47]:
def createDayDataframe(filename):
    path = "tongsdata/"
    filedate = filename.split(".")[0].split("-")

    with open(path + filename, 'r', encoding='utf-8') as fin:
        data_json = json.loads(fin.read())

    #  Create X DataFrame
    todayData = {time: data_json[time]['currently'] for time in data_json}
    todayData_DF = pd.DataFrame(todayData).T

    #  Create y DataFrame
    energyBalanceFilename = "energyBalance/Energy_Balance_{}_{}_{}.csv".format(filedate[0], filedate[1], filedate[2]) #change due to json filename
    energyBalance = pd.read_csv(path + energyBalanceFilename, delimiter=";")

    energyBalance_DF = pd.DataFrame(index=energyBalance.iloc[:-1,0].apply(lambda x: x[2:-1]))
    energyBalance_DF['PV Power Generation'] = energyBalance['PV power generation / Mean values [W]  '].values[:-1]
    energyBalance_DF = energyBalance_DF[energyBalance_DF != ' ']
    energyBalance_DF = energyBalance_DF.dropna()

    #  Join X y DataFrame
    data = todayData_DF.join(energyBalance_DF)
    return data

def cleanDataframe(masterDF):
    masterData_cleaned = masterDF.reset_index()
    masterData_cleaned = masterData_cleaned.rename(columns={'index': 'time_of_day'})
    masterData_cleaned = masterData_cleaned[masterData_cleaned['PV Power Generation'].notna()]
    #masterData_cleaned['month'] = masterData_cleaned['time'].apply(datetime.datetime.fromtimestamp).apply(lambda x: x.month)
    masterData_cleaned['cloudCover'] = masterData_cleaned['cloudCover'].fillna(value=masterData_cleaned['cloudCover'].mean())
    masterData_cleaned['PV Power Generation'] = masterData_cleaned['PV Power Generation'].apply(lambda x: x.replace(",", "")).astype('float')
    return masterData_cleaned

def featureEngineering(masterDF):
    masterDF = masterDF.drop(columns=['icon', 'summary', 'time', 'windBearing', 'windGust', 'ozone', 'precipIntensity', 'precipProbability','pressure', 'precipType'])
    #masterDF = pd.get_dummies(masterDF, columns=['time_of_day', 'precipType', 'uvIndex', 'month'])
    masterDF = pd.get_dummies(masterDF, columns=['time_of_day'])
    return masterDF

    

Darksky key : 2b423d0c90e52bb805ccf6af1f305d7c ,bf11a6cc1e3fb105f612b086f6cefe19

In [26]:
def callApi(date):
    key = "bf11a6cc1e3fb105f612b086f6cefe19"
    latitude = "13.8282"
    longitude = "100.614"
    
    url = "https://api.darksky.net/forecast/{}/{},{}".format(key, latitude, longitude) +"?exclude=currently,minutely,daily"
    
    response = requests.get(url=url)
    return response

In [4]:
def one_month(month, year):
    date = datetime.datetime(year=year, month=month, day=1) #change day here
    nextMonth = date.month + 1
    while(date.month != nextMonth):
        yield date
        date = date + datetime.timedelta(days=1)   
        
def one_day(date):
    tomorrow = date + datetime.timedelta(days=1)
    while(date.day != tomorrow.day):
        yield date
        date = date + datetime.timedelta(minutes=15)   

In [5]:
def write_json(filename, data):
    with open("Testdate/" + filename + ".json", "w", encoding='utf-8') as fout:
        fout.write(json.dumps(data))

In [6]:
def loadjson(predict_date):
    date = pd.to_datetime(predict_date)
    filename = date.strftime("%Y-%m-%d") #change to Y-m-d because file will arrange better
    result = callApi(date)
    d = json.loads(result.text)
    df = pd.DataFrame.from_dict(d['hourly']['data'])
    df['time'] = pd.to_datetime(df['time'],unit='s')
    df.set_index('time',inplace=True)
    df1 = df[filename]
    newdf = pd.DataFrame(np.repeat(df1.values,4,axis=0))
    newdf.columns = df1.columns
    
    maketime = []
    date = predict_date.replace("_","-")
    date = pd.to_datetime(date)
    tomorrow = date + datetime.timedelta(days=1)
    while(date.day != tomorrow.day):
        a = date.strftime("%H:%M")
        maketime.append(a)
        date = date + datetime.timedelta(minutes=15)
    newdf["time"] = maketime
    newdf.set_index('time',inplace=True)
    savetojson = newdf.to_json(r'C:\Users\User\Documents\Senior-Project\Testdate/' + filename + '.json')

In [48]:
def predictgen(predict_date):
    path = "Testdate/" #from load into file above
    filename = predict_date.replace("_",'-') + '.json'
    
    with open(path + filename, 'r', encoding='utf-8') as fin:
        data_json = json.loads(fin.read())
    
    todayData_DF = pd.DataFrame.from_dict(data_json)
    todayData_DF.index.names=['time_of_day']
    todayData_DF = todayData_DF.loc['06:30':'18:00']
    todayData_DF = todayData_DF.reset_index()
    todayData_DF['cloudCover'] = todayData_DF['cloudCover'].fillna(value=todayData_DF['cloudCover'].mean())
    dropcolumns=['icon', 'summary', 'time', 'windBearing', 'windGust', 'ozone', 'precipIntensity', 'precipProbability','pressure','precipType']
    for i in dropcolumns:
        if i in todayData_DF.columns:
            todayData_DF = todayData_DF.drop(columns=[i])
    todayData_DF = pd.get_dummies(todayData_DF, columns=['time_of_day'])

    masterData_for_predict = masterData.drop(columns=['PV Power Generation'])
    for i in range(len(masterData_for_predict.columns)) : #drop power balance
        if masterData_for_predict.columns[i] not in todayData_DF.columns:
            todayData_DF.insert(i, masterData.columns[i+1], 0)
    missingvalue = todayData_DF[todayData_DF.isna().any(axis=1)].index
    
    col = ['apparentTemperature','cloudCover','dewPoint','humidity','temperature','visibility','windSpeed']
    for col in todayData_DF:
        todayData_DF[col] = pd.to_numeric(todayData_DF[col], errors='coerce')
    if todayData_DF.isna().loc[0,'uvIndex']:
        todayData_DF.loc[0,'uvIndex'] = 0
    todayData_DF=todayData_DF.interpolate(limit_direction='both')
    
    first_predict = todayData_DF.astype('float').values
    
    results_first_predict = exported_pipeline.predict(first_predict)
    results_first_predict = np.array(results_first_predict).tolist()
    for i in range(25):
        results_first_predict.insert(0,0.0)
    for j in range (24):
        results_first_predict.append(0.0)
    return results_first_predict

### รันแค่ครั้งเดียวเพื่อ fit model

In [49]:
import os
import pandas as pd
import json
import datetime

files = os.listdir("tongsdata/")

#files.remove(".DS_Store")
files.remove("energyBalance")

masterData = pd.DataFrame()

for file in files:
    r = createDayDataframe(file).loc['06:30':'18:00']
    #print(file, r.shape)
    masterData = pd.concat([masterData, r], axis=0)

masterData = cleanDataframe(masterData)
masterData = featureEngineering(masterData)

features = list(masterData.drop(columns=['PV Power Generation']).columns)

masterData = masterData.dropna()

X = masterData.drop(columns=['PV Power Generation']).astype('float').values
y = masterData['PV Power Generation'].astype('float').values


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y)

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

training_features, testing_features, training_target, testing_target = train_test_split(X, y)

# Average CV score on the training set was:-91867.4890210794
exported_pipeline = RandomForestRegressor(bootstrap=False, max_features=0.3, min_samples_leaf=1, min_samples_split=7, n_estimators=100)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

#from sklearn.metrics import mean_squared_error, r2_score ,mean_absolute_error
#print(mean_squared_error(testing_target, results), 
#mean_absolute_error(testing_target, results), 
#r2_score(testing_target, results))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  app.launch_new_instance()


In [9]:
predict_date=str(pd.datetime.now().date())

In [10]:
loadjson(predict_date)

In [50]:
generation=predictgen(predict_date)

In [51]:
generation

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1988.0966666666668,
 1939.9058333333337,
 2016.5618333333332,
 2038.4498333333333,
 2038.4498333333333,
 2038.4498333333333,
 2050.1724999999997,
 2050.1724999999997,
 2050.1724999999997,
 2050.1724999999997,
 1239.4470000000003,
 1300.1945000000005,
 1190.7558333333338,
 1216.1865000000005,
 680.3936666666667,
 664.9136666666667,
 664.9136666666667,
 664.9136666666667,
 260.1148333333333,
 260.1148333333333,
 260.1148333333333,
 270.93483333333336,
 242.4908333333333,
 242.4908333333333,
 233.51583333333332,
 242.4908333333333,
 280.4341666666667,
 242.2576666666667,
 242.2576666666667,
 242.2576666666667,
 267.9150000000001,
 251.72366666666676,
 251.72366666666676,
 252.82866666666675,
 280.75183333333337,
 314.8176666666667,
 419.7755000000001,
 305.1125,
 241.39299999999994,
 244.5325,
 238.48466666666667,
 335.0983333333333,
 213.0

In [44]:
df = pd.DataFrame()

In [52]:
df["Forecastedgen_without_uvIndexdummies"] = generation

In [54]:
pd.set_option('display.max_rows',100)

In [55]:
df

Unnamed: 0,Forecastedgen_with_uvIndexdummies,Forecastedgen_without_uvIndexdummies
0,0.0,0.0
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,0.0,0.0
9,0.0,0.0


In [32]:
predict_date='2019_04_29'

In [35]:
path = "Testdate/" #from load into file above
filename = predict_date.replace("_",'-') + '.json'

with open(path + filename, 'r', encoding='utf-8') as fin:
    data_json = json.loads(fin.read())
todayData_DF = pd.DataFrame.from_dict(data_json)


todayData_DF.index.names=['time_of_day']
todayData_DF = todayData_DF.loc['06:30':'18:00']
todayData_DF = todayData_DF.reset_index()
todayData_DF['cloudCover'] = todayData_DF['cloudCover'].fillna(value=todayData_DF['cloudCover'].mean())
dropcolumns=['icon', 'summary', 'time', 'windBearing', 'windGust', 'ozone', 'precipIntensity', 'precipProbability','pressure','precipType']
for i in dropcolumns:
    if i in todayData_DF.columns:
        todayData_DF = todayData_DF.drop(columns=[i])
todayData_DF = pd.get_dummies(todayData_DF, columns=['time_of_day'])

masterData_for_predict = masterData.drop(columns=['PV Power Generation'])
for i in range(len(masterData_for_predict.columns)) : #drop power balance
    if masterData_for_predict.columns[i] not in todayData_DF.columns:
        todayData_DF.insert(i, masterData.columns[i+1], 0)
missingvalue = todayData_DF[todayData_DF.isna().any(axis=1)].index

col = ['apparentTemperature','cloudCover','dewPoint','humidity','temperature','visibility','windSpeed']
for col in todayData_DF:
    todayData_DF[col] = pd.to_numeric(todayData_DF[col], errors='coerce')
if todayData_DF.isna().loc[0,'uvIndex']:
    todayData_DF.loc[0,'uvIndex'] = 0
todayData_DF=todayData_DF.interpolate(limit_direction='both')

In [20]:
masterData_for_predict

Unnamed: 0,apparentTemperature,dewPoint,humidity,temperature,uvIndex,visibility,windSpeed,time_of_day_06:30,time_of_day_06:45,time_of_day_07:00,...,time_of_day_16:00,time_of_day_16:15,time_of_day_16:30,time_of_day_16:45,time_of_day_17:00,time_of_day_17:15,time_of_day_17:30,time_of_day_17:45,time_of_day_18:00,precipType_rain
0,84.52,75.89,0.88,79.67,0,6.22,4.43,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1,85.92,76.07,0.88,80.11,0,6.22,4.26,0,1,0,...,0,0,0,0,0,0,0,0,0,1
2,86.84,76.25,0.87,80.55,0,6.22,4.09,0,0,1,...,0,0,0,0,0,0,0,0,0,1
3,87.71,76.34,0.86,81.01,1,6.22,4.41,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,88.6,76.43,0.85,81.47,1,6.22,4.74,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,89.57,76.51,0.84,81.93,1,6.22,5.07,0,0,0,...,0,0,0,0,0,0,0,0,0,1
6,90.51,76.59,0.83,82.39,2,6.22,5.42,0,0,0,...,0,0,0,0,0,0,0,0,0,1
7,92.2,76.62,0.8,83.28,2,6.22,5.64,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,93.78,76.64,0.78,84.18,3,6.22,6.15,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9,95.27,76.62,0.76,85.07,3,6.22,6.89,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [41]:
from sklearn.feature_selection import SelectFromModel
featuresImportance = pd.DataFrame()
featuresImportance['Feature Name'] = features
featuresImportance['Importance'] = exported_pipeline.feature_importances_

featuresImportance.sort_values('Importance', ascending=False)

Unnamed: 0,Feature Name,Importance
54,uvIndex_0,0.301816
4,temperature,0.094868
55,uvIndex_1,0.089619
3,humidity,0.082320
1,cloudCover,0.051183
2,dewPoint,0.048057
0,apparentTemperature,0.046677
6,windSpeed,0.038214
56,uvIndex_2,0.033574
60,uvIndex_6,0.032590
