In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from sklearn import linear_model as lm
from sklearn.model_selection import cross_val_score as cvs
from sklearn.model_selection import KFold
plt.style.use('seaborn-poster')
%matplotlib inline

In [12]:
module_dir = os.path.realpath('.')

pd.options.display.max_columns = 999
pd.options.display.max_rows = 100

### Investigating data for useful azimuth-filling features
Notes:
* Lat/long may be an insignificant feature, it is recommended that all solar panels point toward the equator; however, not all of the sites in NA point south for varying reasons
* Tracing the azimuth back from the annual production (kWh) could be doable (see table here): https://www.civicsolar.com/support/installer/articles/calculating-your-optimal-azimuth-angle
   * This would vary on how much power potential is expected of each case
   * Standard test conditions (STC) rating (DC) is given on the PVDAQ metadata, could compare that to actual performance
   
* Tracing the correlation to the GHI (Ground Irradiance) could be an option
   
   
#### Latitude
"In the northern hemisphere, between the latitudes of 23 and 90, the sun is always in the south. Therefore, the modules on an array are directed to the south in order to get the most out of the sun’s energy."

#### Train Results on the POA Irradiance Calculation?

In [3]:
file_name = 'pvdaq_data_1231_2006.csv'
df = pd.read_csv(file_name)

In [10]:
df.loc[1210:1220]

Unnamed: 0,SiteID,Date-Time,ac_power,ambient_temp,gh_irradiance,poa_irradiance
1210,1231,2006-01-13 14:30:00,880,25.444448,287.2,344.7
1211,1231,2006-01-13 14:45:00,440,25.500003,194.9,227.8
1212,1231,2006-01-13 15:00:00,120,25.333337,137.6,158.5
1213,1231,2006-01-13 15:15:00,160,25.000003,112.9,127.4
1214,1231,2006-01-13 15:30:00,120,24.66667,58.5,65.7
1215,1231,2006-01-13 15:45:00,120,24.444448,66.6,71.7
1216,1231,2006-01-13 16:00:00,0,22.16667,37.1,43.0
1217,1231,2006-01-13 16:15:00,0,20.51667,28.2,32.4
1218,1231,2006-01-13 16:30:00,0,20.600003,59.5,62.0
1219,1231,2006-01-13 16:45:00,40,20.450003,63.5,69.0


In [22]:
file_name = 'pvdaq_system-1231_01-01-2006_12-31-2006.csv'
aggdf = pd.read_csv(file_name)
print(str(aggdf.shape))
aggdf.columns
# aggdf[['load_efficiency', 'mean_array_efficiency',
#        'poa_irradiation', 'reference_yield', 'system_id',
#        'total_energy_input', 'total_energy_output',
#        'total_system_efficiency']]

(12, 19)


Index([u'measdatetime', u'array_capture_losses', u'array_energy_fraction',
       u'array_yield', u'availability', u'bos_efficiency', u'bos_losses',
       u'energy_from_array', u'energy_from_grid', u'energy_to_grid',
       u'final_yield', u'load_efficiency', u'mean_array_efficiency',
       u'poa_irradiation', u'reference_yield', u'system_id',
       u'total_energy_input', u'total_energy_output',
       u'total_system_efficiency'],
      dtype='object')

In [23]:
aggdf['energy_to_grid']

0     128.71
1        NaN
2        NaN
3        NaN
4        NaN
5        NaN
6        NaN
7        NaN
8        NaN
9        NaN
10       NaN
11       NaN
Name: energy_to_grid, dtype: float64

#### Inspecting data that is missing azimuth metadata on PVDAQ
[1430] Raw and Aggregate

In [24]:
file_name = 'pvdaq_data_1430_2016.csv'
df = pd.read_csv(file_name)

In [33]:
print(str(df.columns))
df[['dc_voltage', 'kWh_gross', 'kWh_net',
       'module_temp', 'poa_irradiance']].head()

Index([u'SiteID', u'Date-Time', u'PR', u'ac_power', u'ambient_temp',
       u'dc_current', u'dc_power', u'dc_voltage', u'kWh_gross', u'kWh_net',
       u'module_temp', u'poa_irradiance'],
      dtype='object')


Unnamed: 0,dc_voltage,kWh_gross,kWh_net,module_temp,poa_irradiance
0,13.666666,0.0,0.0,-15.797866,8.173266
1,12.6,0.0,0.0,-16.058266,8.153266
2,12.2,0.0,-1.0,-16.107266,8.300866
3,12.733333,0.0,0.0,-16.0402,7.105666
4,12.333333,0.0,0.0,-16.393733,7.9804


### Conclusions
* Data that is missing azimuth can still have POA irradiance
   * (Does this mean we can do POA, sun position, lat/long to figure out the azimuth?)
   * Answer: Yes, but POA is not always given, see docs example from Rdtools
* Are there two separate problems here: when POA irradiance IS provided and when it is NOT?
   * Answer: Because 

### Questions
* How is POA irradiance tracked? How is it tracked?
* If we have GHI irradiance and POA irradiance, can we calculate azimuth?