## Import 2017 Solar Generation Data

In [1]:
import datetime
import glob
import os
import pandas as pd
import pytz

In [2]:
# Location of data files (unzipped) relative to working directory
data_path = '..\\solar_data'
mths = glob.glob(data_path + "/*.txt")

In [3]:
mths

['..\\solar_data\\sl_023034_2017_01.txt',
 '..\\solar_data\\sl_023034_2017_02.txt',
 '..\\solar_data\\sl_023034_2017_03.txt',
 '..\\solar_data\\sl_023034_2017_04.txt',
 '..\\solar_data\\sl_023034_2017_05.txt',
 '..\\solar_data\\sl_023034_2017_06.txt',
 '..\\solar_data\\sl_023034_2017_07.txt',
 '..\\solar_data\\sl_023034_2017_08.txt',
 '..\\solar_data\\sl_023034_2017_09.txt',
 '..\\solar_data\\sl_023034_2017_10.txt',
 '..\\solar_data\\sl_023034_2017_11.txt',
 '..\\solar_data\\sl_023034_2017_12.txt']

In [4]:
colspec = [(1,2), (3,9), (10,14), (15,17), (18,20), (21,23), (24,26), 
           (27,34), (35,42), (43,50), (51,58), (59,66), 
           (67,74), (75,82), (83,90), (91,98), (99,106), 
           (107,114), (115,122), (123,130), (131,138), (139,146), 
           (147,154), (155,162), (163,170), (171,178), (179,186), 
           (187,194), (195,202), (203,210), (211, 218), (219,226), 
           (227,232), (233,238), (239,244), (245,252)]

headers = ['record_id', 'station_number', 'yr', 'mth', 'day', 'hr', 'mnt', 
          'mean_global', 'min_global', 'max_global', 'sd_global', 'uncert_mean_global',
          'mean_direct', 'min_direct', 'max_direct', 'sd_direct', 'uncert_mean_direct',
          'mean_diffuse', 'min_diffuse', 'max_diffuse', 'sd_diffuse', 'uncert_mean_diffuse',
          'mean_terr', 'min_terr', 'max_terr', 'sd_terr', 'uncert_mean_terr',
          'mean_dir_horiz', 'min_dir_horiz', 'max_dir_horiz', 'sd_dir_horiz', 'uncert_dir_horiz',
          'sunsh_sec_96', 'sunsh_sec_120', 'sunsh_sec_144', 'zenith_dist']

In [5]:
def load_1_sec_solar(path):
    return pd.read_fwf(path, colspecs=colspec, names=headers, index_col=False, skiprows=[0])

In [6]:
PV_2017 = pd.concat((load_1_sec_solar(f) for f in mths), ignore_index=True)

In [7]:
# Consolidate time/date fields into one
ACST = pytz.timezone('Australia/Adelaide')

def dt(yr, mth, day, hr, mnt):
    return ACST.localize(datetime.datetime(yr, mth, day, hr, mnt))

dt = PV_2017.apply(lambda x: dt(x['yr'], x['mth'], x['day'], x['hr'], x['mnt']), axis=1)
PV_2017['date_time'] = dt
PV_2017 = PV_2017.drop(['yr', 'mth', 'day', 'hr', 'mnt'], axis=1)
PV_2017 = PV_2017.set_index('date_time')

In [8]:
PV_2017.index[1]

Timestamp('2017-01-01 00:01:00+1030', tz='Australia/Adelaide')

In [9]:
datetime.datetime.utctimetuple(PV_2017.index[0])

time.struct_time(tm_year=2016, tm_mon=12, tm_mday=31, tm_hour=13, tm_min=30, tm_sec=0, tm_wday=5, tm_yday=366, tm_isdst=0)

In [10]:
PV_2017.columns

Index(['record_id', 'station_number', 'mean_global', 'min_global',
       'max_global', 'sd_global', 'uncert_mean_global', 'mean_direct',
       'min_direct', 'max_direct', 'sd_direct', 'uncert_mean_direct',
       'mean_diffuse', 'min_diffuse', 'max_diffuse', 'sd_diffuse',
       'uncert_mean_diffuse', 'mean_terr', 'min_terr', 'max_terr', 'sd_terr',
       'uncert_mean_terr', 'mean_dir_horiz', 'min_dir_horiz', 'max_dir_horiz',
       'sd_dir_horiz', 'uncert_dir_horiz', 'sunsh_sec_96', 'sunsh_sec_120',
       'sunsh_sec_144', 'zenith_dist'],
      dtype='object')

In [11]:
PV_2017.head()

Unnamed: 0_level_0,record_id,station_number,mean_global,min_global,max_global,sd_global,uncert_mean_global,mean_direct,min_direct,max_direct,...,uncert_mean_terr,mean_dir_horiz,min_dir_horiz,max_dir_horiz,sd_dir_horiz,uncert_dir_horiz,sunsh_sec_96,sunsh_sec_120,sunsh_sec_144,zenith_dist
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01 00:00:00+10:30,l,23034,0.0,0.0,0.0,0.0,,0.0,,0.0,...,,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0
2017-01-01 00:01:00+10:30,l,23034,0.0,0.0,0.0,0.0,,0.0,,0.0,...,16.76,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0
2017-01-01 00:02:00+10:30,l,23034,0.0,0.0,0.0,0.0,,0.0,,0.0,...,16.71,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0
2017-01-01 00:03:00+10:30,l,23034,0.0,0.0,0.0,0.0,,0.0,,0.0,...,16.72,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0
2017-01-01 00:04:00+10:30,l,23034,0.0,0.0,0.0,0.0,,0.0,,0.0,...,16.73,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0


In [18]:
PV_2017.index[0]

Timestamp('2017-01-01 00:00:00+1030', tz='Australia/Adelaide')

In [12]:
import matplotlib.pyplot as plt
plt.plot(PV_2017.mean_dir_horiz['2017-01-01':'2017-01-03']);

## Import Observation data for Adelaide Airport

This code extracts the latest observations from the BOM website for Adelaide Airport.  Subscription access is required for historic observations at half-hourly level. If we obtain it, hopefully the same code can be recycled.

In [13]:
import requests
url = 'http://www.bom.gov.au/fwo/IDS60901/IDS60901.94672.json'
r = requests.get(url)
obs = r.json()

In [14]:
ADL_obs = pd.DataFrame(obs['observations']['data'])

In [15]:
# Create a datetime index
ADL_obs['date_time'] = ADL_obs.aifstime_utc.apply(
    lambda x: pytz.timezone('UTC').localize(datetime.datetime.strptime(x,'%Y%m%d%H%M%S')).astimezone(ACST))
ADL_obs.date_time = pd.to_datetime(ADL_obs.date_time)
ADL_obs = ADL_obs.drop(['aifstime_utc'], axis=1)
ADL_obs = ADL_obs.set_index('date_time')

In [16]:
ADL_obs

Unnamed: 0_level_0,air_temp,apparent_t,cloud,cloud_base_m,cloud_oktas,cloud_type,cloud_type_id,delta_t,dewpt,gust_kmh,...,sort_order,swell_dir_worded,swell_height,swell_period,vis_km,weather,wind_dir,wind_spd_kmh,wind_spd_kt,wmo
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-08-14 22:30:00+09:30,14.4,7.4,Clear,,0.0,-,,5.5,1.8,37,...,0,-,,,10,-,N,28,15,94672
2018-08-14 22:00:00+09:30,14.1,7.6,-,,,-,,5.1,2.6,37,...,1,-,,,10,-,N,26,14,94672
2018-08-14 21:30:00+09:30,14.8,8.6,-,,,-,,5.5,2.4,30,...,2,-,,,10,-,N,24,13,94672
2018-08-14 21:00:00+09:30,15.1,8.6,Clear,2500.0,0.0,-,,5.5,2.9,35,...,3,-,,,40,Fine,N,26,14,94672
2018-08-14 20:30:00+09:30,14.6,7.1,-,,,-,,5.0,3.6,46,...,4,-,,,10,-,N,32,17,94672
2018-08-14 20:25:00+09:30,14.6,7.9,-,,,-,,5.0,3.6,46,...,5,-,,,10,-,N,28,15,94672
2018-08-14 20:00:00+09:30,14.5,8.3,-,,,-,,4.7,4.3,33,...,6,-,,,10,-,N,26,14,94672
2018-08-14 19:30:00+09:30,14.9,8.3,-,,,-,,4.9,4.3,35,...,7,-,,,10,-,NNE,28,15,94672
2018-08-14 19:00:00+09:30,15.2,9.1,-,,,-,,4.9,4.7,32,...,8,-,,,10,-,NNE,26,14,94672
2018-08-14 18:30:00+09:30,15.2,10.4,-,,,-,,4.6,5.5,28,...,9,-,,,10,-,N,20,11,94672


In [17]:
ADL_obs.index[0]

Timestamp('2018-08-14 22:30:00+0930', tz='Australia/Adelaide')