# Pull Solar Generation Data from Renewables.ninja and Prepare

**Note: Renewables.ninja can be used to pull solar generation data from any location (globally), but is limited to the time range 2000 to end 2023**

In [None]:
import pandas as pd
import numpy as np
import os
import json
import time
import requests
import urllib3
import yaml

In [None]:
# location of Cambridge Botanic Garden weather station
# from https://www.metoffice.gov.uk/research/climate/maps-and-data/uk-synoptic-and-climate-stations
latlon = [52.194, 0.131] # location to get data for
years = list(range(2000,2024)) # years of data to retrieve

system_loss = None
tracking = None
tilt = None
azim = None

In [None]:
# set default pv attributes
if not system_loss: system_loss = 0.1
if not tracking: tracking = 0
if not tilt: tilt = 35
if not azim: azim = 180 # south facing, see docs

**NOTE**: an API token is required to access the data, see docs for more info - https://www.renewables.ninja/documentation/api

In [None]:
# get API token
with open('api.yaml') as f:
    api_creds = yaml.load(f, Loader=yaml.FullLoader)

In [None]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

token = api_creds['credentials']['token']
url = 'https://www.renewables.ninja/api/data/pv'

s = requests.session()
s.headers = {'Authorization': 'Token ' + token}

args = {
    'lat': latlon[0],
    'lon': latlon[1],
    'date_from': '', #doesn't matter, will be overwritten
    'date_to': '', #doesn't matter, will be overwritten
    'dataset': 'merra2',
    'capacity': 1,
    'system_loss': system_loss,
    'tracking': tracking,
    'tilt': tilt,
    'azim': azim,
    'format': 'json',
    'raw': 'true' # get raw weather data as well
}

data_per_year = []
metadata = []
for year in years:
    args['date_from'] = str(year) + '-01-01'
    args['date_to'] = str(year) + '-12-31'
    time.sleep(10 if year > 2000 else 0) # used to space out api calls
    r = s.get(url, params=args, verify=False)
    if r.status_code != 200:
        print('Error (' + str(r.status_code) + ') getting data for year ' + str(year))
        print(r.text)
        cont = input("Data pull failed. Save partial data set? (y/n): ")
        if (cont not in ['yes','y','Yes','Y','yep']):
                pull_successful = False
                raise
        break
    print('Data for ' + str(year) + ' pulled successfully') 
    parsed_response = json.loads(r.text)
    data = pd.read_json(json.dumps(parsed_response['data']), orient='index')
    # at this point I could save the yearly data to CSVs for later access?
    data_per_year.append(data)
    metadata = parsed_response['metadata']
print('Pulling data completed')

In [None]:
pv_gen_data = pd.DataFrame()

for i in range(len(years)):
    pv_gen_data = pd.concat([pv_gen_data, data_per_year[i].rename_axis('datetime').reset_index()],ignore_index=True)

# convert units to Watts
pv_gen_data['electricity'] = np.around(pv_gen_data['electricity']*1000,1)
pv_gen_data['irradiance_direct'] = np.around(pv_gen_data['irradiance_direct']*1000,1)
pv_gen_data['irradiance_diffuse'] = np.around(pv_gen_data['irradiance_diffuse']*1000,1)

pv_gen_data.round({'temperature':3})

pv_gen_data.rename(columns={
    'electricity':'solar generation [W/kW]',
    'irradiance_direct':'irradiance_direct [W/m2]',
    'irradiance_diffuse':'irradiance_diffuse [W/m2]',
    'temperature':'temperature [degC]'
    },inplace=True)

In [None]:
# validate data availability
for col in ['solar generation [W/kW]','irradiance_direct [W/m2]','irradiance_diffuse [W/m2]']:
    assert not (True in np.isnan(pv_gen_data[col]))

### Note on irradiance units

Unforunately, renewables.ninja does not provide unit information for the irradiance values returned when raw data is requested.

However, looking at the codebase for the `gsee` package, https://github.com/renewables-ninja/gsee, which the renewables.ninja documentation says is used to produce the solar generation data, https://renewables.ninja/documentation, it can be determined that the irradiation values provided are most likely to be in units of [kW/m2].

In the `gsee` code implementation, all irradiances are in units of [W/m2] - this is easiest seen by looking at the input units of `gsee.pv.PVPanel.panel_power(...)`, which is used in `pv.run_model(...)` to compute the pv power generation. However, the values returned by renewables.ninja are in the wrong scale, and are roughly 1000 times smaller than the values expected - comparing to https://solcast.com/solar-radiation-map/united-kingdom. So it is assumed the irradiance values are given in [kW/m2] to be consistent with the power units which are [kW]. Further evidence for this is found as the irradiance values returned do not change as the power capacity of the panel specified in the API call increases, indicating that they are normalised by the size of the panel.

In [None]:
print(pv_gen_data.iloc[8:20])

In [None]:
print(metadata)

In [None]:
# save processed data to csv
location = 'cambridge'
dir_name = '{0}_{1}_{2}'.format(location,str(latlon[0]).replace(".", "-"),str(latlon[1]).replace(".", "-"))
save_dir = os.path.join('processed_data',dir_name)
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

for year in years:
    year_data = pv_gen_data[pv_gen_data['datetime'].dt.year == year].copy()
    year_data['datetime'] = year_data['datetime'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
    year_data.to_csv(os.path.join(save_dir,f'{year}.csv'), index=False)

# save metadata
mdata_save_path = os.path.join(save_dir,f"{location}_mdata.json")
mdata_dict = {
    'location':location,
    'data_path':save_dir,
    'years':years,
    'RN_mdata':metadata
    }
with open(mdata_save_path, 'w') as file:
    json.dump(mdata_dict, file, indent=4)