## Load module and libraries

In [1]:
from selenium import webdriver
from datetime import datetime, timedelta
from random import randrange
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import calendar
import json
import time
import os

## Download login info as cookie (run this only when you need cookie)

In [29]:
# ## Open the virtual browser 
# driver = webdriver.Firefox()

# ## Open the website 
# website = 'https://enlighten.enphaseenergy.com/systems/1302574/inverters/28563594/time_series_x?&date=2019-05-04&stat=POWR%2CDCV%2CDCA%2CACV%2CACHZ%2CTMPI'
# driver.get(website)

# ## Username and password
# user="mfeng@umass.edu"
# password="UmassCS!"

# ## Login the website
# driver.find_element_by_id('user_email').click()
# driver.find_element_by_id("user_email").send_keys(user)
# driver.find_element_by_id('user_password').click()
# driver.find_element_by_id("user_password").send_keys(password)
# driver.find_element_by_id('submit').click()

# ## Store the login info in cookie
# driver.get(website)
# cookie_items = driver.get_cookies()

# post = {}

# for cookie_item in cookie_items:
#     post[cookie_item['name']] = cookie_item['value']
    
# cookie_str = json.dumps(post)
# with open('cookie.txt', 'w', encoding='utf-8') as f:
#     f.write(cookie_str)
# f.close

## Download data for each panel

In [30]:
# Use the stored cookie to login and keep session open
with open('cookie.txt', 'r',encoding='utf-8') as f:
    cookie = f.read()
cookies = json.loads(cookie)

## Align microinverter's serial number with inverter ID
system_id = '1302574'

## Inverter serial number and ID is aligned one by one 
inverter_sn = ['121721038143', '121721037801', '121721037691', '121721038936', '121721038148', 
             '121721037892', '121721037821', '121721037806', '121721038079', '121721038122', 
             '121721038133', '121721037662', '121721037689', '121721037871', '121721037817', 
             '121721038020', '121721038147', '121721038076', '121721038911', '121721037842', 
             '121721037788', '121721037686', '121721037867', '121721038108', '121721038125', 
             '121721037685', '121721038107', '121721038154', '121721038144', '121721037880', 
             '121721038037']

inverter_id = [str(28563593+i) for i in range(len(inverter_sn))]
inverter_sn_id = {k:v for k,v in zip(inverter_id,inverter_sn)}

# print("Microinverter ID: serial number")
# print(inverter_sn_id)

## Group panels based on their facing direction (west/east/lower)
west_roof = ['121721038107', '121721037685', '121721038037', '121721038147', '121721037806', 
             '121721037892', '121721038143']
east_roof = ['121721037871', '121721037662', '121721037801', '121721037691', '121721037880',
             '121721038936', '121721038122', '121721038148', '121721038133']
lower_roof = ['121721037686', '121721037788', '121721038076', '121721038079', '121721037867', 
              '121721038020', '121721037817', '121721038144', '121721038911', '121721037842']
other_roof = ['121721038125','121721037689','121721037821','121721038154','121721038108']


combine_roof = ['121721038107','121721037685','121721038037','121721037871','121721037662','121721038148',
            '121721038076', '121721037817', '121721037867']

west_roof_id = []
east_roof_id = []
lower_roof_id = []
other_roof_id = []
combine_roof_id = []

for inverter in inverter_sn:
    if inverter in west_roof:
        west_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    elif inverter in east_roof:
        east_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    elif inverter in lower_roof:
        lower_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    elif inverter in other_roof:
        other_roof_id.append(inverter_id[inverter_sn.index(inverter)])
    
# for inverter in inverter_sn:
#     if inverter in combine_roof:
#         combine_roof_id.append(inverter_id[inverter_sn.index(inverter)])

## Formula: website = link_1 + system_id + link_2 + inverter_id + link_3 + date + link_4
link_1 = 'https://enlighten.enphaseenergy.com/systems/'
link_2 = '/inverters/'
link_3 = '/time_series_x?&date='
link_4 = '&stat=POWR%2CDCV%2CDCA%2CACV%2CACHZ%2CTMPI'

In [31]:
def calendar_test(day, count, date_list = None):
    if date_list is None:
        date_list = []
    if count == 0:
        return date_list
    count -= 1
    day = day + timedelta(days=1)
    date = day.isoformat().split('T')[0]
    date_list.append(date)
    
    calendar_test(day,count,date_list)
    
    return date_list

def download_data(start_day, end_day, roof_panel, save=False):
    
    start_day = datetime.strptime(start_day, '%Y-%m-%d')
    end_day = datetime.strptime(end_day, '%Y-%m-%d')
    total_day = abs((end_day - start_day).days)
    day_list = calendar_test(start_day,total_day)
    
    for inverter_id in roof_panel:
        print('Inverter ID: {}'.format(inverter_id))
    
        info = {'date_time':[],'epoch':[], 'power':[], 'power_unknown':[], 'DCV':[], 
                'DCA':[], 'ACV':[], 'ACHZ':[], 'TMPI':[]}
        for day in day_list:
            # website address where data is stored
            website = link_1 + system_id + link_2 + inverter_id + link_3 + day + link_4
            # access data in json format from the website
            res = requests.get(url=website, cookies=cookies)
            data = res.json()
        
            # extract time information 
            for ii in data['POWR']:
                epoch = ii[0]
                info['epoch'].append(epoch)
                
                date_time = time.struct_time(time.localtime(ii[0]))
                date = 10000*date_time[0]+100*date_time[1]+1*date_time[2]
                time_ = 100*date_time[3]+1*date_time[4]
                date_time = date*10000+time_                
            
                info['date_time'].append(date_time)
                info['power'].append(ii[1])
                info['power_unknown'].append(ii[2])
                        
            for ii in data['DCV']:
                info['DCV'].append(ii[1])
            
            for ii in data['DCA']:
                info['DCA'].append(ii[1])
            
            for ii in data['ACV']:
                info['ACV'].append(ii[1])
            
            for ii in data['ACHZ']:
                info['ACHZ'].append(ii[1])
            
            for ii in data['TMPI']:
                info['TMPI'].append(ii[1])
            
        # Save downloaded data for each panel
        data = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in info.items() ]))
        if save is True: 
            export_csv = data.to_csv (str(inverter_id)+".csv", index = True, header=True)
            
    return data
    
def daily_data(start_day, end_day, roof_panel, save=False):
    
    start_day = datetime.strptime(start_day, '%Y-%m-%d')
    end_day = datetime.strptime(end_day, '%Y-%m-%d')
    total_day = abs((end_day - start_day).days)
    day_list = calendar_test(start_day,total_day)
    
    for inverter_id in roof_panel:
        print('Inverter ID: {}'.format(inverter_id))
        
        info = {day:[] for day in day_list}
        
        for day in day_list:
            # website address where data is stored
            website = link_1 + system_id + link_2 + inverter_id + link_3 + day + link_4
            # access data in json format from the website
            res = requests.get(url=website, cookies=cookies)
            data = res.json()
    
            # extract time information 
            for ii in data['POWR']:
                
                info[day].append(ii[1])
            
        # Save downloaded data for each panel
        data = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in info.items() ]))

        if save is True: 
            export_csv = data.to_csv (str(inverter_id)+".csv", index = True, header=True)
            
        return data

In [32]:
start_day = '2017-9-12'
end_day = '2020-2-11'
roof_panel = other_roof_id 
data = download_data(start_day, end_day,roof_panel, True)
# data = daily_data(start_day, end_day,roof_panel, True)

Inverter ID: 28563599
Inverter ID: 28563605
Inverter ID: 28563616
Inverter ID: 28563617
Inverter ID: 28563620


## Clean Data 

In [42]:
def load_data(file):   
    ## Load dataset 
    dataset = pd.read_csv(file, index_col=0)
    dataset = dataset.set_index('date_time')
    dataset = dataset.drop(['power_unknown', 'DCV', 'DCA', 'ACV', 'ACHZ', 'TMPI'], axis=1)
    dataset = dataset.rename(columns = {'power': str(file[:-4])})
    
    return dataset

def convert_to_epoch(day_time):
    d = str(day_time)[:4]+'-'+str(day_time)[4:6]+'-'+str(day_time)[6:8]+\
    ' '+str(day_time)[8:10]+':'+str(day_time)[10:]
    p = '%Y-%m-%d %H:%M'
    epoch = int(time.mktime(time.strptime(d,p)))
    return epoch

def create_daily_data(dataset):
    # Create time frame from 6:00AM to 7:00PM
    init = 600
    minute = 0
    time_frame = ['0'+str(init)]
    while init <= 1900:
        minute += 1
        if minute == 60:
            minute = 0
            init += 100-59
        else:
            init += 1
    
        value = str(init)
        if len(value) == 3:
            value = '0'+value
        time_frame.append(value)
    
    n = len(dataset)
    daily_data = {}

    for i in range(n):
        panel_data = dataset[i]
        row_index = panel_data.index
        id_ = panel_data.columns[1]
    
        for index in row_index:
            day = str(index)[:8]
        
            if day not in daily_data:
                day_index = [int(day + i) for i in time_frame]
                daily_data[day] = pd.DataFrame(index=day_index)
            
            value = panel_data.loc[index][id_]
            if type(value) == np.float64:
                daily_data[day].at[index,id_] = panel_data.loc[index][id_]
            else:
                daily_data[day].at[index,id_] = np.mean(panel_data.loc[index][id_].values) 
                
    return daily_data

def combine_data(daily_data,day_list):
    
    # Columns (panels' ID)
    cols = [col for col in daily_data[day_list[0]].columns]

    # Create an empty DataFrame with columns of panels' ID
    clean = pd.DataFrame(columns=cols)

    # Rows (time frame index, from 6am to 7pm with granularity of 1 minute)
    rows = [index for index in daily_data[day_list[0]].index]

    day_col = []
    time_col = []
    epoch = []
    product_issue = []

    for day in day_list:
        # Daily data
        data = daily_data[day]
    
        if len(data.columns) != len(cols):
            product_issue.append(day)
            continue
        # Rows index
        rows = [index for index in data.index]
        for index in rows:
            row_values = data.loc[index]
    
            if row_values.isna().sum() == 0:
                clean.at[index] = row_values
            
                day_col.append(day[0:4]+'-'+day[4:6]+'-'+day[6:])
                time_col.append(str(index)[8:10]+':'+str(index)[10:])
                epoch.append(convert_to_epoch(index))

            elif row_values.isna().sum() == len(cols):
                continue
            elif row_values.isna().sum() <= 3:
                row_values = row_values.values
                index_loc = rows.index(index)
            
                day_col.append(day[0:4]+'-'+day[4:6]+'-'+day[6:])
                time_col.append(str(index)[8:10]+':'+str(index)[10:])
                epoch.append(convert_to_epoch(index))
    
                for i in range(len(row_values)):
                    if np.isnan(row_values[i]):
                        window = [-2,-1,0,1,2]
                        nearby_value = []
                    
                        for loc in window:
                            if (index_loc - loc) >= 0 and (index_loc + loc) < len(rows):
                                nearby_value.append(data.loc[rows[index_loc+loc]][i])
                        nearby_value = [nearby for nearby in nearby_value if ~np.isnan(nearby)]
                    
                        if len(nearby_value) == 0:
                            row_values[i] = 0
                        else:
                            row_values[i] = np.mean(nearby_value)    
                clean.at[index] = row_values
        
    clean['date'] = day_col
    clean['time'] = time_col
    clean['epoch_time'] = epoch
    
    return clean,product_issue

In [43]:
# Load files from current directory
files = os.listdir(os.getcwd())
files = [file for file in files if file[:-4] in roof_panel]

# Load data
dataset = [load_data(file) for file in files]

# Create daily data
daily_data = create_daily_data(dataset)

# Create day list to access daily data
day_list = [i for i in daily_data]

# Combine the power of all panels
clean_data, issue_day = combine_data(daily_data,day_list)

# Add the product issue data
for i in issue_day:
    anomaly,_ = combine_data(daily_data,[i])
    clean_data = pd.concat([clean_data,anomaly], axis=0, ignore_index=True)

# Replace NaN data with 0
clean_data = clean_data.fillna(0)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [3]:
# Collaborate with weather and temperature data from DarkSky API
# Weather data required

def combine_weather_temp(dataset):
    
    weather = pd.read_csv('darksky_weather_data.csv')
    day_list = [day for day in set(dataset['date'])]
    
    weather_dict = {}

    for i in range(len(weather)):

        epoch = weather.loc[i]['time']
        date_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(epoch))
        date = date_time.split(' ')[0]
        hour = date_time.split(' ')[1].split(':')[0]
    
        if date not in weather_dict:
            weather_dict[date] = {}
        
        if hour not in weather_dict[date]:
            weather_dict[date][hour] = []
            weather_dict[date][hour].append(weather.loc[i]['icon'])
            weather_dict[date][hour].append(weather.loc[i]['temperature'])
        
    weather_col = []
    temperature_col = []
    n = len(dataset)

    for i in range(n):
        date = dataset.loc[i]['date']
        hour = dataset.loc[i]['time'].split(':')[0]
    
        if date in weather_dict:
            if hour in weather_dict[date]:
                weather_col.append(weather_dict[date][hour][0])
                temperature_col.append(weather_dict[date][hour][1])
            else:
                weather_col.append(np.float64('nan'))
                temperature_col.append(np.float64('nan'))
        else:
            weather_col.append(np.float64('nan'))
            temperature_col.append(np.float64('nan'))
            
    for i in range(len(weather_col)):
        weather = weather_col[i]
        if weather == 'clear-day' or weather == 'clear-night':
            weather_col[i] = 'sunny'
        elif weather == 'partly-cloudy-day' or weather == 'partly-cloudy-night':
            weather_col[i] = 'partly-cloudy'
        
    dataset['weather']=weather_col
    dataset['temperature']=temperature_col
    
    return dataset


clean_data = combine_weather_temp(clean_data)

In [5]:
title = 
# Save clean data
export_csv = clean_data.to_csv (title + ".csv", index = False, header=True)    