**Important Note:** In oreder to be able to generate the datafiles you should run this notebook from the notebooks folder and you should have a data folder as described in the github repo

In [3]:
from datetime import datetime

def from_datetime_to_unix(year=2020, month=1, day=1, hour=0, minute=0, second=0):
    dt = datetime(year, month, day, hour, minute, second)
    unix_time = int((dt - datetime(1970,1,1)).total_seconds() * 1000)
    return unix_time 

In [4]:
AGGREGATION_CODES = {"five_min":"1", "hourly":"2", "daily":"3", 
                     "monthly":"4", "quarterly":"5", "ten_min":"6",
                     "fifteen_min":"7", "twenty_min":"8", "thirty":"9"}

In [5]:
import requests
import pandas as pd
import json
from datetime import datetime
from dateutil import tz

HERE_TZ = tz.tzlocal()
ZIMBABWE_TZ = tz.gettz('UTC+2')
COLUMNS = ['date', 'timestamp', 'consumption', 'solar', 'alwaysOn', 'gridImport',
       'gridExport', 'selfConsumption', 'selfSufficiency', 'active',
       'reactive', 'voltages', 'phaseVoltages', 'currentHarmonics',
       'voltageHarmonics']

def collect_data(aggregation_type="hourly", service_location_id="47740", start_year=2020, start_month=1, start_day=1, end_year=2021, end_month=2, end_day=12):
    # Setting up variables
    aggregation_code = AGGREGATION_CODES[aggregation_type]
    from_ = from_datetime_to_unix(year=start_year, month=start_month, day=start_day)
    to_ = from_datetime_to_unix(year=end_year, month=end_month, day=end_day)
    # Making request
    URL = "https://app1pub.smappee.net/dev/v3/servicelocation/{}/consumption?aggregation={}&from={}&to={}"
    url = URL.format(service_location_id, aggregation_code, from_, to_)
    payload={}
    headers = {
      'Authorization': 'Bearer ab299760-c43d-320c-8be0-1bb74a643a8b'
    }
    response = requests.request("GET", url, headers=headers, data=payload)
    # Json Serializing
    data = json.loads(response.text)
    # Converting to pandas and saving .csv file
    data_df = pd.DataFrame(data['consumptions'])
    data_df['date'] = data_df['timestamp'].apply(lambda x: datetime.fromtimestamp(x/1000)
                                                 .replace(tzinfo=HERE_TZ)
                                                 .astimezone(ZIMBABWE_TZ)
                                                 .replace(tzinfo=None))
    data_df = data_df[COLUMNS]
    data_df.to_csv(f'../data/data_{aggregation_type}_id_{service_location_id}.csv', index=False)
    return data_df

In [16]:
aggregation_types = ['five_min', 'hourly', 'daily', 'monthly']
service_location_ids = [
    '47740',# Puma Rhodesville,
    '47803'# Puma HQ
                       ]

for service_location_id in service_location_ids:
    for aggregation_type in aggregation_types:
        collect_data(aggregation_type=aggregation_type, service_location_id=service_location_id)

In [7]:
# JMcQ # 2021/03/04
# Get latest few weeks of data to use for ML testing
collect_data(aggregation_type="hourly", service_location_id="47740", 
             start_year=2021, start_month=2, start_day=1, end_year=2021, end_month=3, end_day=3)

Unnamed: 0,date,timestamp,consumption,solar,alwaysOn,gridImport,gridExport,selfConsumption,selfSufficiency,active,reactive,voltages,phaseVoltages,currentHarmonics,voltageHarmonics
0,2021-02-01 02:00:00,1612137600000,1924.152,670.403,593.951,1253.648,0.0,100.0,34.85,"[None, None, 476.6, 172.8, 0.0, None, None, No...","[None, None, 459.1, -37.3, 0.0, None, None, No...","[229.6, None, None]","[229.6, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
1,2021-02-01 03:00:00,1612141200000,1944.303,662.915,593.951,1281.388,0.0,100.0,34.10,"[None, None, 504.4, 172.8, 0.0, None, None, No...","[None, None, 486.1, -37.6, 0.0, None, None, No...","[228.9, None, None]","[228.9, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
2,2021-02-01 04:00:00,1612144800000,1844.709,661.204,593.951,1183.404,0.0,100.0,35.85,"[None, None, 406.8, 172.8, 0.0, None, None, No...","[None, None, 369.9, -37.5, 0.0, None, None, No...","[230.2, None, None]","[230.2, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
3,2021-02-01 05:00:00,1612148400000,1616.359,657.117,593.951,959.241,0.0,100.0,40.65,"[None, None, 458.6, 184.9, 0.0, None, None, No...","[None, None, 439.9, -39.7, 0.0, None, None, No...","[229.0, None, None]","[229.0, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
4,2021-02-01 06:00:00,1612152000000,1486.643,473.826,593.951,1012.717,0.0,100.0,31.88,"[None, None, 649.3, 250.7, 0.0, None, None, No...","[None, None, 563.1, -46.3, 0.0, None, None, No...","[225.9, None, None]","[225.9, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
636,2021-03-02 21:00:00,1614711600000,1804.970,621.108,203.984,1184.061,0.0,100.0,34.40,"[None, None, 346.0, 173.0, 0.0, None, None, No...","[None, None, 325.9, -35.3, 0.0, None, None, No...","[222.0, None, None]","[222.0, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
637,2021-03-02 22:00:00,1614715200000,1701.736,454.022,203.984,1247.713,0.0,100.0,26.68,"[None, None, 406.7, 173.7, 0.0, None, None, No...","[None, None, 408.5, -36.5, 0.0, None, None, No...","[225.8, None, None]","[225.8, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
638,2021-03-02 23:00:00,1614718800000,1727.693,564.456,203.984,1163.136,0.0,100.0,32.68,"[None, None, 321.5, 174.0, 0.0, None, None, No...","[None, None, 313.3, -36.8, 0.0, None, None, No...","[226.4, None, None]","[226.4, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."
639,2021-03-03 00:00:00,1614722400000,1862.484,644.251,203.984,1218.333,0.0,100.0,34.59,"[None, None, 374.1, 174.0, 0.0, None, None, No...","[None, None, 381.5, -37.1, 0.0, None, None, No...","[227.7, None, None]","[227.7, None, None]","[[], [], [], [], [], [], [], [], [], [], [], [...","[[], [], [], [], [], [], [], [], [], [], [], [..."


**notes**: 
* We have missing days for example when requesting jan 01 to jan 07 we only get 5 consumption details not 7

```bash
{serviceLocationId: int
consumptions: list[
    dict {
        "timestamp": int
        "consumption": float
        "solar": float
        "alwaysOn": float
        "gridImport": float 
        "gridExport": float 
        "selfConsumption": float
        "selfSufficiency": float 
        "active": list[],
        ...
    }
    ]
}
```