# Getting data from the API

In [None]:
import json
import pandas as pd
import requests
import yaml
import os

# load config yaml file
config_file = "/users/macbook/Development/solar_power_forecast_ML/config.yaml"
with open(config_file, 'r') as f:
    config = yaml.safe_load(f)

# Acces API url
api_url = config['api_url']

# Access data path
raw_data_path = config['paths']['raw_data_path']
interim_data_path = config['paths']['interim_data_path']


print(f"Loaded config for project: {config['project_info']['project_name']}")
if raw_data_path is True:
    print(f"Raw data path: {raw_data_path}")


NameError: name '__file__' is not defined

### API call variables:
- Choose fromat between csv or json

In [2]:
# 1. Define variables parameters for API call
location_name = config['project_info']['location_name'] # use for file naming, not for API call 

latitude = 52.516275       # Brandenburger Tor lattitude
longitude = 13.377704      # Brandenburger Tor longitude       
start_year = 2005
end_year = 2023  
response_type = 'seriescalc' #'seriescalc': hourly radiation; ''DRcalc': daily radiation; 'tmy': typical meteorological year

format = 'csv' 

In [3]:
api_url = config['api_url']
base_url = os.path.join(api_url,response_type)

if base_url:
    print(f"API call has been made to: {base_url} ")

# Parameters for a 35° tilt system
pvgid_params = dict(
    lat=latitude,
    lon=longitude,
    #usehorizon=           # No mandatory. Calculate taking into account shadows from high horizon. Value of 1 for "yes"
    startyear=start_year, 
    endyear=end_year,   
    pvcalculation=1,       # 0: only solar radiation, 1: add PV production
    peakpower=4.5,          # Nominal power of the PV system, in kWp.
    mountingplace='free', # Example: free-standing (or 'building')
    loss=14,              # % system losses 
    slope=35,             # Example: 35 degree tilt from horizontal
    azimuth=0,              # Example: Facing South (0 degrees)
    components=1,           #1: outputs beam, diffuse and reflected radiation components. 0: only global values.
    hourly=1,
    outputformat=format     #basic: for non comment csv output, json or csv
)

try:
    response = requests.get(base_url, params=pvgid_params, timeout=30)

    if response.status_code == 200:
        print("API call successful")
        if format == 'csv':
            data = response.text
        elif format == 'json':
            data = response.json()
    else:
        print(f"API call failed with status code: {response.status_code}, server response: {response.text}")

except requests.exceptions.RequestException as e:
    print(f"An error occurred during the API request: {e}")

API call has been made to: https://re.jrc.ec.europa.eu/api/v5_3/seriescalc 
API call successful


## Save API call data 
- save raw file locally from call
- remove headers & footers (for csv file)
- save to data/interim

In [4]:
# construct the data file naming
file_type = f".{format}" # csv or json.refers to api call format

raw_data_path = config['paths']['raw_data_path']
raw_file_name = f"raw_{location_name}_{response_type}{file_type}"
raw_data_file = f"{raw_data_path}/{raw_file_name}"

interim_data_path = config['paths']['interim_data_path']
interim_file_name = f"no_headers_{location_name}_{response_type}{file_type}"
interim_data_file = f"{interim_data_path}/{interim_file_name}"

In [None]:
# create a function to convert time column
def convert_time_column(data, time_format): 
    df = pd.DataFrame(data)
    df["time"] = pd.to_datetime(df["time"], format=time_format)
    return df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 166536 entries, 0 to 166535
Data columns (total 9 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   time    166536 non-null  datetime64[ns]
 1   P       166536 non-null  float64       
 2   Gb(i)   166536 non-null  float64       
 3   Gd(i)   166536 non-null  float64       
 4   Gr(i)   166536 non-null  float64       
 5   H_sun   166536 non-null  float64       
 6   T2m     166536 non-null  float64       
 7   WS10m   166536 non-null  float64       
 8   Int     166536 non-null  float64       
dtypes: datetime64[ns](1), float64(8)
memory usage: 11.4 MB
None


In [None]:
# save raw file locally in csv or json
if format == 'csv':
    with open(raw_data_file, 'w' , encoding='utf-8') as csv_file:
        csv_file.write(response.text)
        print(f"File saved locally: {raw_data_file}")
        
    # remove the comments, change time column to timestamp and make a copy in data/interim
    with open(raw_data_file, 'r', encoding='utf-8') as csv_file:    
        data = pd.read_csv(csv_file, skiprows=10, skipfooter=12, sep=',', engine='python')
        data = convert_time_column(data, time_format="%Y%m%d:%H%M")
        data.to_csv(interim_data_file,index=False)
        print(f"File saved locally: {interim_data_file}")

elif format == 'json':
    with open(raw_data_file, 'w') as json_file:
        json.dump(data, json_file, indent=2)
        print(f"File saved locally: {raw_data_file}")

File saved locally: /Users/macbook/Development/solar_power_forecast_ML/data/01_raw/raw_brandenburger_gate_seriescalc.csv
File saved locally: /Users/macbook/Development/solar_power_forecast_ML/data/02_interim/no_headers_brandenburger_gate_seriescalc.csv
