# Fetch weather data

Dark Sky provides a nice API with first 1000 calls for free, and then 0.0001 cent per request

In [2]:
# import ConfigImports Notebook to import and configure libs
%run ./ConfigImports.ipynb

# import API key
%run ./Secrets.ipynb

TF -> Using GPU ->  PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


#### Collect data from DarkSky API

In [2]:
def make_ts(year, month, day, hour, minute=0, second=0):
    return int(datetime(year, month, day, hour, minute, second).timestamp())

#### Define API parameters

In [3]:
BASE_URL = 'https://api.darksky.net/forecast'
API_KEY = DARK_SKY_API_KEY  # comes from the config file
lat = 51.802931199999996  # my house coordinates
long = -8.302591999999999
timestamp = make_ts(2020, 1, 4, 15, 10)
headers = {'Accept-Encoding': 'gzip'}

#### Define keys to pull from the API data

In [4]:

cur_keys = map(str.strip, """summary, precipIntensity, precipProbability, precipType, temperature, apparentTemperature, humidity,
windSpeed, windGust, windBearing, cloudCover, uvIndex, visibility""".split(','))
cur_keys = [k for k in cur_keys]
daily_keys = map(str.strip, """summary, sunriseTime, sunsetTime, temperatureHigh, temperatureLow""".split(','))
daily_keys = [k for k in daily_keys]

#### Define date range for data pull

Pull data hourly (always 30 minutes past an hour)

In [12]:
idx = pd.date_range(start='2019-12-27 00:30:00', end='2020-03-02 23:59:00', freq='1H')
df = pd.DataFrame({'dt': idx})

#### Pull data

Collect data for each time frame and store dataset as a csv file

In [13]:
def make_url(ts):
    return f'{BASE_URL}/{API_KEY}/{lat},{long},{ts}?exclude=hourly,flags,minutely&units=ca'

dark_sky = []
prev_cur_obj = {}
prev_daily_obj = {}

for index, d in tqdm(df.iterrows(), total=df.shape[0]):
    
    # extract date-time info from dt object
    args = (d.dt.year, d.dt.month, d.dt.day, d.dt.hour, d.dt.minute)
    ts = make_ts(*args)
    
    # make a call to Dark Sky API
    url = make_url(ts)
    sky_data = requests.get(url, headers=headers)
    sky_data = sky_data.json()
    currently = sky_data['currently']
    daily = sky_data['daily']
    
    # init object to save
    cur_obj = {'ts': ts, 'dt': d}
    
    # keep only selected data elements
    for k in cur_keys:
        if k in currently:
            cur_obj[f'cur__{k}'] = currently[k]
            prev_cur_obj[k] = currently[k]
        else:
            cur_obj[f'cur__{k}'] = prev_cur_obj[k]
        
    for k in daily_keys:
        if k in daily['data'][0]:
            cur_obj[f'daily__{k}'] = daily['data'][0][k]
            prev_daily_obj[k] = daily['data'][0][k]
        else:
            cur_obj[f'daily__{k}'] = prev_daily_obj[k]
    dark_sky.append(cur_obj)
    
dark_sky_df = pd.DataFrame(dark_sky)
dark_sky_df['dt'] = df['dt']
dark_sky_df.to_csv('../Datasets/dark_sky_data_2019-12-27_2020-03-02.csv', index=False)

100%|██████████| 1608/1608 [39:24<00:00,  1.47s/it] 


In [7]:
df = pd.read_parquet('../Datasets/AllObjectDetections_2019-09-09_2020-03-02.parquet.gzip')

In [9]:
df.head()

Unnamed: 0,index,img_idx,label,confidence,x1,y1,x2,y2,date,time,filename,img_n_boxes,time_ms,date_time,week_day,is_weekend,month,hour,min
0,0,72846,car,0.523175,298,7,426,71,2019-09-09,07.02.40,07.02.40.270_34c99836_car-car-car.jpg,1,270,2019-09-09 07:02:40.270,Monday,False,9,7,2
1,1,72847,person,0.759682,489,31,518,106,2019-09-09,12.02.42,12.02.42.921_ea6c9143_person-bicycle.jpg,2,921,2019-09-09 12:02:42.921,Monday,False,9,12,2
2,2,72847,bicycle,0.532076,444,54,484,100,2019-09-09,12.02.42,12.02.42.921_ea6c9143_person-bicycle.jpg,2,921,2019-09-09 12:02:42.921,Monday,False,9,12,2
3,3,72848,person,0.864749,463,55,537,263,2019-09-09,07.30.02,07.30.02.409_c5662b14_person-car-car.jpg,1,409,2019-09-09 07:30:02.409,Monday,False,9,7,30
4,4,72849,car,0.859297,302,23,410,73,2019-09-09,20.26.56,20.26.56.841_4ba2f42d_car.jpg,1,841,2019-09-09 20:26:56.841,Monday,False,9,20,26


In [11]:
df.head(300000).to_csv('testme.csv')

In [37]:
np_data = np.ones((5, 3))

In [38]:
pd_df = pd.DataFrame(np_data)

In [39]:
pd_df

Unnamed: 0,0,1,2
0,1.0,1.0,1.0
1,1.0,1.0,1.0
2,1.0,1.0,1.0
3,1.0,1.0,1.0
4,1.0,1.0,1.0


In [40]:
pd_df['new'] = 'A'
pd_df['new'] = pd_df['new'].astype('category')

In [41]:
pd_df

Unnamed: 0,0,1,2,new
0,1.0,1.0,1.0,A
1,1.0,1.0,1.0,A
2,1.0,1.0,1.0,A
3,1.0,1.0,1.0,A
4,1.0,1.0,1.0,A


In [43]:
pd_df.iloc[0, 3]

'A'

In [53]:
try:
    pd_df['new'] = pd_df['new'].cat.add_categories('B')
except ValueError:
    pass
try:
    pd_df['new'] = pd_df['new'].cat.add_categories('D')
except ValueError:
    pass

pd_df.iloc[0, 3] = 'B'
pd_df.iloc[1, 3] = 'D'

In [54]:
pd_df

Unnamed: 0,0,1,2,new
0,1.0,1.0,1.0,B
1,1.0,1.0,1.0,D
2,1.0,1.0,1.0,A
3,1.0,1.0,1.0,A
4,1.0,1.0,1.0,A


In [55]:
pd_df.memory_usage(deep=True).sum()

531