## Load module and libraries

In [1]:
from selenium import webdriver
from datetime import datetime, timedelta
from random import randrange
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import calendar
import json
import time
import os

## Download login info as cookie (run this only when you need cookie)

In [117]:
# ## Open the virtual browser 
# driver = webdriver.Firefox()

# ## Open the website 
# website = 'https://enlighten.enphaseenergy.com/systems/1302574/inverters/28563594/time_series_x?&date=2019-05-04&stat=POWR%2CDCV%2CDCA%2CACV%2CACHZ%2CTMPI'
# driver.get(website)

# ## Username and password
# user="mfeng@umass.edu"
# password="UmassCS!"

# ## Login the website
# driver.find_element_by_id('user_email').click()
# driver.find_element_by_id("user_email").send_keys(user)
# driver.find_element_by_id('user_password').click()
# driver.find_element_by_id("user_password").send_keys(password)
# driver.find_element_by_id('submit').click()

# ## Store the login info in cookie
# driver.get(website)
# cookie_items = driver.get_cookies()

# post = {}

# for cookie_item in cookie_items:
#     post[cookie_item['name']] = cookie_item['value']
    
# cookie_str = json.dumps(post)
# with open('cookie.txt', 'w', encoding='utf-8') as f:
#     f.write(cookie_str)
# f.close

## Download data for each panel

In [2]:
# Use the stored cookie to login and keep session open
with open('cookie.txt', 'r',encoding='utf-8') as f:
    cookie = f.read()
cookies = json.loads(cookie)

## Align microinverter's serial number with inverter ID
system_id = '1302574'

## Inverter serial number and ID is aligned one by one 
inverter_sn = ['121721038143', '121721037801', '121721037691', '121721038936', '121721038148', 
             '121721037892', '121721037821', '121721037806', '121721038079', '121721038122', 
             '121721038133', '121721037662', '121721037689', '121721037871', '121721037817', 
             '121721038020', '121721038147', '121721038076', '121721038911', '121721037842', 
             '121721037788', '121721037686', '121721037867', '121721038108', '121721038125', 
             '121721037685', '121721038107', '121721038154', '121721038144', '121721037880', 
             '121721038037']

inverter_id = [str(28563593+i) for i in range(len(inverter_sn))]
inverter_sn_id = {k:v for k,v in zip(inverter_id,inverter_sn)}

# print("Microinverter ID: serial number")
# print(inverter_sn_id)

## Group panels based on their facing direction (west/east/lower)
west_roof = ['121721038107', '121721037685', '121721038037', '121721038147', '121721037806', 
             '121721037892', '121721038143']
east_roof = ['121721037871', '121721037662', '121721037801', '121721037691', '121721037880',
             '121721038936', '121721038122', '121721038148', '121721038133']
lower_roof = ['121721037686', '121721037788', '121721038076', '121721038079', '121721037867', 
              '121721038020', '121721037817', '121721038144', '121721038911', '121721037842']

# lower_roof = ['121721037686']

combine_roof = ['121721038107','121721037685','121721038037','121721037871','121721037662','121721038148',
            '121721038076', '121721037817', '121721037867']

west_roof_id = []
east_roof_id = []
lower_roof_id = []
combine_roof_id = []

for inverter in inverter_sn:
    if inverter in west_roof:
        west_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    elif inverter in east_roof:
        east_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    elif inverter in lower_roof:
        lower_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    
# for inverter in inverter_sn:
#     if inverter in combine_roof:
#         combine_roof_id.append(inverter_id[inverter_sn.index(inverter)])

## Formula: website = link_1 + system_id + link_2 + inverter_id + link_3 + date + link_4
link_1 = 'https://enlighten.enphaseenergy.com/systems/'
link_2 = '/inverters/'
link_3 = '/time_series_x?&date='
link_4 = '&stat=POWR%2CDCV%2CDCA%2CACV%2CACHZ%2CTMPI'

In [3]:
def calendar_test(day, count, date_list = None):
    if date_list is None:
        date_list = []
    if count == 0:
        return date_list
    count -= 1
    day = day + timedelta(days=1)
    date = day.isoformat().split('T')[0]
    date_list.append(date)
    
    calendar_test(day,count,date_list)
    
    return date_list

def download_data(start_day, end_day, roof_panel, save=False):
    
    start_day = datetime.strptime(start_day, '%Y-%m-%d')
    end_day = datetime.strptime(end_day, '%Y-%m-%d')
    total_day = abs((end_day - start_day).days)
    day_list = calendar_test(start_day,total_day)
    
    for inverter_id in roof_panel:
        print('Inverter ID: {}'.format(inverter_id))
    
        info = {'date_time':[],'epoch':[], 'power':[], 'power_unknown':[], 'DCV':[], 
                'DCA':[], 'ACV':[], 'ACHZ':[], 'TMPI':[]}
        for day in day_list:
            # website address where data is stored
            website = link_1 + system_id + link_2 + inverter_id + link_3 + day + link_4
            # access data in json format from the website
            res = requests.get(url=website, cookies=cookies)
            data = res.json()
        
            # extract time information 
            for ii in data['POWR']:
                epoch = ii[0]
                info['epoch'].append(epoch)
                
                date_time = time.struct_time(time.localtime(ii[0]))
                date = 10000*date_time[0]+100*date_time[1]+1*date_time[2]
                time_ = 100*date_time[3]+1*date_time[4]
                date_time = date*10000+time_                
            
                info['date_time'].append(date_time)
                info['power'].append(ii[1])
                info['power_unknown'].append(ii[2])
                        
            for ii in data['DCV']:
                info['DCV'].append(ii[1])
            
            for ii in data['DCA']:
                info['DCA'].append(ii[1])
            
            for ii in data['ACV']:
                info['ACV'].append(ii[1])
            
            for ii in data['ACHZ']:
                info['ACHZ'].append(ii[1])
            
            for ii in data['TMPI']:
                info['TMPI'].append(ii[1])
            
        # Save downloaded data for each panel
        data = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in info.items() ]))
        if save is True: 
            export_csv = data.to_csv (str(inverter_id)+".csv", index = True, header=True)
            
    return data
    
def daily_data(start_day, end_day, roof_panel, save=False):
    
    start_day = datetime.strptime(start_day, '%Y-%m-%d')
    end_day = datetime.strptime(end_day, '%Y-%m-%d')
    total_day = abs((end_day - start_day).days)
    day_list = calendar_test(start_day,total_day)
    
    
    
    for inverter_id in roof_panel:
        print('Inverter ID: {}'.format(inverter_id))
        
        info = {day:[] for day in day_list}
        
        for day in day_list:
            # website address where data is stored
            website = link_1 + system_id + link_2 + inverter_id + link_3 + day + link_4
            # access data in json format from the website
            res = requests.get(url=website, cookies=cookies)
            data = res.json()
    
            # extract time information 
            for ii in data['POWR']:
                
                info[day].append(ii[1])
            
        # Save downloaded data for each panel
        data = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in info.items() ]))

        if save is True: 
            export_csv = data.to_csv (str(inverter_id)+".csv", index = True, header=True)
            
        return data

In [19]:
start_day = '2019-11-12'
end_day = '2020-1-14'
roof_panel = east_roof_id
data = download_data(start_day, end_day,roof_panel, True)
# data = daily_data(start_day, end_day,roof_panel, True)

Inverter ID: 28563594
Inverter ID: 28563595
Inverter ID: 28563596
Inverter ID: 28563597
Inverter ID: 28563602
Inverter ID: 28563603
Inverter ID: 28563604
Inverter ID: 28563606
Inverter ID: 28563622


## Clean Data 

In [21]:
# Load files from current directory
files = os.listdir(os.getcwd())
files = [file for file in files if file[:-4] in roof_panel]

In [22]:
def load_data(file):
    
    ## Load dataset 
    dataset = pd.read_csv(file, index_col=0)
    dataset = dataset.set_index('date_time')
    dataset = dataset.drop(['power_unknown', 'DCV', 'DCA', 'ACV', 'ACHZ', 'TMPI'], axis=1)
    dataset = dataset.rename(columns = {'power': str(file[:-4])})
    
    return dataset

In [23]:
dataset = [load_data(file) for file in files]

In [24]:
# Merge power output for each panel based on timestamp
data = dataset[0]
for i in range(1, len(dataset)):
#     data = pd.merge(data, dataset[i], left_index=True, right_index=True)
    data = data.join(dataset[i], lsuffix='_caller', rsuffix='_other')
    data = data.interpolate(method ='linear', limit_direction ='both', limit = 3) 

# Remove duplicated row   
data = data.reset_index().drop_duplicates(subset='date_time',keep='first').set_index('date_time')
data = data.dropna()

In [25]:
data

Unnamed: 0_level_0,epoch_caller,28563595,epoch_other,28563594,epoch_caller,28563596,epoch_other,28563622,epoch_caller,28563597,epoch_other,28563606,epoch_caller,28563604,epoch_other,28563602,epoch,28563603
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
201911130755,1573649718,3.705,1.573650e+09,0.172667,1.573650e+09,3.014,1.573650e+09,0.391,1.573650e+09,0.676,1.573650e+09,6.2680,1.573650e+09,2.459,1.573650e+09,0.000,1.573650e+09,0.5170
201911130756,1573649791,4.824,1.573650e+09,0.259000,1.573650e+09,2.566,1.573650e+09,1.589,1.573650e+09,1.000,1.573650e+09,7.3510,1.573650e+09,4.329,1.573650e+09,0.908,1.573650e+09,0.5170
201911130757,1573649867,5.613,1.573650e+09,0.345333,1.573650e+09,2.466,1.573650e+09,2.274,1.573650e+09,0.347,1.573650e+09,7.5945,1.573650e+09,4.431,1.573650e+09,1.584,1.573650e+09,0.5170
201911130758,1573649926,5.333,1.573650e+09,0.431667,1.573650e+09,3.305,1.573650e+09,3.606,1.573650e+09,0.000,1.573650e+09,7.8380,1.573650e+09,4.533,1.573650e+09,2.260,1.573650e+09,0.5170
201911130759,1573649990,6.270,1.573650e+09,0.518000,1.573650e+09,4.894,1.573650e+09,5.328,1.573650e+09,0.000,1.573650e+09,7.7500,1.573650e+09,4.852,1.573650e+09,4.353,1.573650e+09,0.6665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202001141627,1579037269,1.115,1.579037e+09,0.000000,1.579037e+09,1.481,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,1.4290,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,1.6250
202001141628,1579037321,0.962,1.579037e+09,0.000000,1.579037e+09,1.073,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,1.1880,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,1.3270
202001141629,1579037399,0.688,1.579037e+09,0.000000,1.579037e+09,0.843,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,0.7000,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,0.9300
202001141631,1579037474,0.526,1.579037e+09,0.000000,1.579037e+09,0.613,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,0.3190,1.579037e+09,0.000,1.579037e+09,0.000,1.579037e+09,0.9300


In [26]:
# Save the cleaned data
export_csv = data.to_csv ("east_roof_rest.csv", index = True, header=True)