In [1]:
import warnings
warnings.filterwarnings("ignore")
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Mounted at /content/drive


In [2]:
import os
if not os.path.exists('twindata'):
  !unzip -q 'drive/My Drive/Data/twindata.zip'

!ls 'twindata'

a_AUG2020.csv  d_NOV2020.csv  g_FEB2021.csv  j_MAY2021.csv
b_SEP2020.csv  e_DEC2020.csv  h_MAR2021.csv  k_JUN2021.csv
c_OCT2020.csv  f_JAN2021.csv  i_APR2021.csv  l_JUL2021.csv


In [3]:
import pandas as pd
import glob

path = r'twindata' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in sorted(all_files):
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

df_year = pd.concat(li, axis=0, ignore_index=True)

In [36]:
import pandas as pd 
import requests
from json import JSONEncoder
import numpy as np

class DateTimeEncoder(JSONEncoder):
        #Override the default method
        def default(self, obj):
            if isinstance(obj, (datetime.date, datetime.datetime)):
                return obj.isoformat()

In [39]:
## Added by Arvind 

def get_data(id,attri):
    ''' 
    Edit this if your running a cron Job 
    queryTimeDiff = datetime.timedelta(minutes=90)
    endDate = datetime.datetime.now()
    startDate = datetime.datetime.now() - queryTimeDiff
    '''
    
    query = {'id':id,'attributes':attri,'startDate':'2021-03-01 00:00:00','endDate':'2021-03-02 00:00:00'}
    query = DateTimeEncoder().encode(query)
    query = eval(query)
    response = requests.get('http://54.206.42.58:8006/api/v2/historicalData/getObjectAttributeHistoricalData', params=query)

    data = response.json()
    exportedData = data["data"]["ObjectData"]
    df = pd.DataFrame(exportedData)


    pivoted = df.pivot( index= 'measurementtimestamp' , columns='attributeserviceid' , values= 'value' )
    pivoted.replace(np.nan, 0 , inplace=True)
    pivoted.rename( columns={ 'Irradiance Global (W/m^2)': 'irradiance' , 'Back-of-Module Temperature 2 (deg C)' : 'backtmp2' , 'Active Power' : 'activepower'   } , inplace=True )
    




In [38]:
#Aug-2020 to July-2021 
get_data('WP_SF_MVPS4.PM1,WP_SF_MVPS4.WS1','Irradiance Global (W/m^2),Back-of-Module Temperature 2 (deg C),Active Power')



attributeserviceid
Active Power                            0
Back-of-Module Temperature 2 (deg C)    3
Irradiance Global (W/m^2)               3
dtype: int64
attributeserviceid
Active Power                            0
Back-of-Module Temperature 2 (deg C)    0
Irradiance Global (W/m^2)               0
dtype: int64


In [4]:
def load_manipulate_data():
  import numpy as np
  # Load irradiance data
  rslt_irradiance = df_year[df_year['PME_MeasurementName'] == 'Irradiance Global (W/m^2)']
  rslt_irradiance['TimestampUTC'] = pd.to_datetime(rslt_irradiance['TimestampUTC'], format='%d/%m/%Y %I:%M:%S.%f %p')
  rslt_irradiance.rename(columns={'PME_Value': 'irradiance'}, inplace=True)

  rslt_back_temp2 = df_year[df_year['PME_MeasurementName'] == 'Back-of-Module Temperature 2 (deg C)']
  rslt_back_temp2['TimestampUTC'] = pd.to_datetime(rslt_back_temp2['TimestampUTC'], format='%d/%m/%Y %I:%M:%S.%f %p')
  rslt_back_temp2.rename(columns={'PME_Value': 'backtmp2'}, inplace=True)

  # Load active power data
  rslt_activepower = df_year[(df_year['PME_SourceName'] == 'WP_SF_MVPS4.PM1') & (df_year['PME_MeasurementName'] == 'Active Power')]
  rslt_activepower['TimestampUTC'] = pd.to_datetime(rslt_activepower['TimestampUTC'], format='%d/%m/%Y %I:%M:%S.%f %p')
  rslt_activepower.rename(columns={'PME_Value': 'activepower'}, inplace=True)
  
  rslt_irradiance_select = rslt_irradiance[['TimestampUTC', 'irradiance']]
  rslt_back_temp2_select = rslt_back_temp2[['TimestampUTC', 'backtmp2']]
  rslt_activepower_select = rslt_activepower[['TimestampUTC','activepower']]

  df_merged1 = pd.merge(rslt_irradiance_select, rslt_back_temp2_select, on='TimestampUTC')
  df_merged_withnan = pd.merge(df_merged1, rslt_activepower_select, on='TimestampUTC')
  df_merged = df_merged_withnan.replace(np.nan, 0)

  return df_merged

In [5]:
def normalize_data():
  irr_mean = df_merged['irradiance'].mean()
  backtmp2_mean = df_merged['backtmp2'].mean()
  activepower_mean = df_merged['activepower'].mean()

  df_merged['ap_norm'] = df_merged['activepower']/activepower_mean

  df_merged['ag_val'] = 0.9*(df_merged['irradiance']/irr_mean) + 0.1*(df_merged['backtmp2']/backtmp2_mean)

  return df_merged, activepower_mean

In [6]:
def pre_process():
  from sklearn.model_selection import train_test_split
  import numpy as np
  from sklearn import preprocessing
  from sklearn import utils
  
  X = np.array(df_merged['ag_val'])
  y = np.array(df_merged['ap_norm'])

  split = int(df_merged['TimestampUTC'].count()*0.9)

  X_train = X[:split]
  X_test = X[split:]
  y_train = y[:split]
  y_test = y[split:]
  X_train = X_train.reshape(-1,1)
  X_test = X_test.reshape(-1,1)

  return X_train, X_test, y_train, y_test, split

In [7]:
def get_error_rate():
  import math 

  y = df_merged['activepower'][split:]
  y_bar = predicted_activepower['ActivePowerPrediction']

  summation = 0 
  n = df_merged['activepower'][split:].count()


  for i in range (0,n-1):
    difference = y.iloc[i-1] - y_bar.iloc[i] 
    squared_difference = difference**2
    summation = summation + squared_difference

  RMSD_perKw = math.sqrt(summation)/df_merged['activepower'][split:].sum()
  return RMSD_perKw

In [8]:
def use_input_normalize():
  import numpy as np
  irr_mean = df_merged['irradiance'].mean()
  backtmp2_mean = df_merged['backtmp2'].mean()

  df_user['agg_norm'] = 0.9*df_user['irradiance']/irr_mean + 0.1*df_user['temperature']/backtmp2_mean
  X = np.array(df_user['agg_norm'])
  X_user = X.reshape(-1,1)
  return X_user

In [11]:
if __name__ == "__main__":

  #Import svm model
  from sklearn.svm import SVR
  from datetime import datetime
  from datetime import timedelta
  from sklearn.externals import joblib

  df_merged = load_manipulate_data()
  df_merged, activepower_mean = normalize_data()
  X_train, X_test, y_train, y_test, split = pre_process()

  #Create a svm Classifier
  clf = SVR(kernel='rbf')

  request_type = 'user'

  if (request_type =='train'):
    #Train the model using the training sets
    clf.fit(X_train, y_train)
    joblib.dump(clf, 'model.pkl')

  if (request_type =='user'):
    clf = joblib.load('model.pkl')

    #Predict the response for test dataset
    y_pred = clf.predict(X_test)
    predicted_activepower = pd.DataFrame(data=y_pred, columns=["ActivePowerPrediction"])
    predicted_activepower['activepower_predicted'] = predicted_activepower['ActivePowerPrediction']*activepower_mean

    # Calculate error band
    RMSD_perKw = get_error_rate()

    user_time = [15, 30, 60]
    user_irradiance = [400, 520 , 715]
    user_temperature = [20, 24, 25]

    user_time_f = []
    for i in user_time:
      user_time_f.append(df_merged['TimestampUTC'].iloc[-1]+timedelta(minutes=i))

    user_inputs = {'TimestampUTC': user_time_f,
                    'irradiance': user_irradiance, 
                    'temperature': user_temperature}

    df_user = pd.DataFrame(data=user_inputs)

    X_user = use_input_normalize()
    y_user = clf.predict(X_user)

    userinput_activepower = pd.DataFrame(data=y_user, columns=["ActivePowerPrediction"])
    userinput_activepower.insert(loc=0, column='TimestampUTC', value = user_time_f)
    userinput_activepower['activepower_predicted'] = userinput_activepower['ActivePowerPrediction']*activepower_mean

    userinput_activepower['lower'] = userinput_activepower["activepower_predicted"]*(1-RMSD_perKw)
    userinput_activepower['upper'] = userinput_activepower["activepower_predicted"]*(1+RMSD_perKw)
    last_data = df_merged['activepower'].iloc[-1]
    userinput_activepower.loc[len(userinput_activepower)] = [df_merged['TimestampUTC'].iloc[-1], last_data, last_data, last_data, last_data]

    userinput_activepower = userinput_activepower.sort_values(by=['TimestampUTC'])


In [12]:
import plotly.express as px
import plotly.graph_objects as go

from datetime import datetime
from datetime import timedelta

n=15

fig = go.Figure()

fig.add_trace(go.Scatter(x=df_merged['TimestampUTC'].tail(50), y=df_merged['activepower'].tail(50),
                    mode='lines',
                    name='Active Power', line_color="#0000ff"))


fig.add_trace(go.Scatter(x=userinput_activepower['TimestampUTC'], y=userinput_activepower['lower'],
    fill=None,
    mode='lines',
    line_color='#00BFFF',
    name='Lower'
    ))
fig.add_trace(go.Scatter(
    x=userinput_activepower['TimestampUTC'],
    y=userinput_activepower['upper'],
    fill='tonexty', # fill area between trace0 and trace1
    mode='lines', line_color='#00BFFF',
    name='Upper'))

fig.add_trace(go.Scatter(x=userinput_activepower['TimestampUTC'], y=userinput_activepower['activepower_predicted'],
                    mode='lines',
                    name='ActivePowerPrediction', line_color="red"))
fig.show()