## Get areas from .npz files and put into pandas dataframe

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime

In [24]:
#load areas
areas_2017 = np.load('/home/esther/poppy-finder/data/areas_2017.npz')
areas_2018 = np.load('/home/esther/poppy-finder/data/areas_2018.npz')
areas_2019 = np.load('/home/esther/poppy-finder/data/areas_2019.npz')
areas_2020 = np.load('/home/esther/poppy-finder/data/areas_2020.npz')

In [29]:
#convert from .npz to dictionaries
areas_2017_dict = {item: areas_2017[item] for item in areas_2017.files}
areas_2018_dict = {item: areas_2018[item] for item in areas_2018.files}
areas_2019_dict = {item: areas_2019[item] for item in areas_2019.files}
areas_2020_dict = {item: areas_2020[item] for item in areas_2020.files}

#merge all years into a single dictionary
areas_dict = {**areas_2017_dict, **areas_2018_dict, **areas_2019_dict, **areas_2020_dict}

In [111]:
#make lists to put into dataframe

#get list of locations
location_list = [name.split('_')[1] for name in list(areas_dict)]

#get list of dates (in datetime format)
date_list = [datetime.strptime(name.split('_')[0], '%Y-%m-%d') for name in list(areas_dict)]

#get # pixels in the background area
background_list = [areas_dict[key][0] for key in areas_dict]

#get # of patches
n_patches_list = [len(areas_dict[key]) -1 for key in areas_dict]

#this is where my list comprehension skills break down and I resort to looping
patches_list = []
max_patch_list = []
for key in areas_dict:
    
    #remove background patch
    tmp = areas_dict[key][1:]
    
    #get total # of pixels from all patch areas
    patches_list.append(sum(tmp))
    
    #get number of pixels in biggest patch (that is not background)
    if sum(tmp)==0:
        max_patch_list.append(0)
    else:
        max_patch_list.append(max(tmp))

In [143]:
df = pd.DataFrame({'location':location_list, 'background':background_list, 'patches':patches_list, \
                   'n_patches':n_patches_list, 'max_patch':max_patch_list}, index=date_list)
df

Unnamed: 0,location,background,patches,n_patches,max_patch
2017-03-08,grassmtn,640338,0,0,0
2017-03-15,antelope,569449,0,0,0
2017-03-25,antelope,569449,0,0,0
2017-03-28,grassmtn,640338,0,0,0
2017-04-04,antelope,557560,11889,1,11889
...,...,...,...,...,...
2020-04-23,antelope,393647,175802,16,107001
2020-04-23,elsinore,823250,0,0,0
2020-04-26,grassmtn,640338,0,0,0
2020-04-28,antelope,473324,96125,19,35983


In [144]:
df_antelope = df[df.location=='antelope']
df_elsinore = df[df.location=='elsinore']
df_grassmtn = df[df.location=='grassmtn']
df_grassmtn

Unnamed: 0,location,background,patches,n_patches,max_patch
2017-03-08,grassmtn,640338,0,0,0
2017-03-28,grassmtn,640338,0,0,0
2017-04-27,grassmtn,640338,0,0,0
2018-03-08,grassmtn,640338,0,0,0
2018-03-18,grassmtn,640338,0,0,0
2018-03-23,grassmtn,640338,0,0,0
2018-03-28,grassmtn,640338,0,0,0
2018-04-02,grassmtn,640338,0,0,0
2018-04-12,grassmtn,640338,0,0,0
2018-04-17,grassmtn,640338,0,0,0


In [149]:
df_antelope

Unnamed: 0,location,background,patches,n_patches,max_patch
2017-03-15,antelope,569449,0,0,0
2017-03-25,antelope,569449,0,0,0
2017-04-04,antelope,557560,11889,1,11889
2017-04-14,antelope,569449,0,0,0
2017-04-24,antelope,569449,0,0,0
2018-03-05,antelope,569449,0,0,0
2018-03-15,antelope,569449,0,0,0
2018-03-25,antelope,569449,0,0,0
2018-03-30,antelope,569449,0,0,0
2018-04-04,antelope,569449,0,0,0


In [150]:
df_elsinore

Unnamed: 0,location,background,patches,n_patches,max_patch
2018-03-05,elsinore,823250,0,0,0
2018-03-15,elsinore,823250,0,0,0
2018-03-25,elsinore,823250,0,0,0
2018-03-30,elsinore,823250,0,0,0
2018-04-09,elsinore,823250,0,0,0
2018-04-14,elsinore,823250,0,0,0
2018-04-24,elsinore,823250,0,0,0
2018-04-29,elsinore,823250,0,0,0
2019-03-05,elsinore,823250,0,0,0
2019-03-15,elsinore,678471,144779,19,98150


In [39]:
observed_antelope = pd.read_csv('/home/esther/poppy-finder/data/observations-antelope.csv')
observed_elsinore = pd.read_csv('/home/esther/poppy-finder/data/observations-elsinore.csv') 

In [33]:
observed_antelope

Unnamed: 0,observed_on,latitude,longitude
0,2017-03-11,34.749275,-118.395100
1,2017-03-21,34.739843,-118.385018
2,2017-03-21,34.737230,-118.376000
3,2017-03-12,34.724912,-118.396906
4,2017-03-24,34.751371,-118.252298
...,...,...,...
191,2020-03-29,34.729131,-118.381763
192,2020-04-24,34.724444,-118.402500
193,2020-04-25,34.747997,-118.318550
194,2019-03-31,34.703987,-118.331603


In [40]:
obs_antelope_list = [datetime.strptime(date, '%Y-%m-%d') for date in observed_antelope.observed_on.tolist()]
observed_antelope['date'] = obs_antelope_list
observed_antelope

Unnamed: 0,observed_on,latitude,longitude,date
0,2017-03-11,34.749275,-118.395100,2017-03-11
1,2017-03-21,34.739843,-118.385018,2017-03-21
2,2017-03-21,34.737230,-118.376000,2017-03-21
3,2017-03-12,34.724912,-118.396906,2017-03-12
4,2017-03-24,34.751371,-118.252298,2017-03-24
...,...,...,...,...
191,2020-03-29,34.729131,-118.381763,2020-03-29
192,2020-04-24,34.724444,-118.402500,2020-04-24
193,2020-04-25,34.747997,-118.318550,2020-04-25
194,2019-03-31,34.703987,-118.331603,2019-03-31


In [41]:
observed_antelope.set_index('date', inplace=True)
observed_antelope

Unnamed: 0_level_0,observed_on,latitude,longitude
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-03-11,2017-03-11,34.749275,-118.395100
2017-03-21,2017-03-21,34.739843,-118.385018
2017-03-21,2017-03-21,34.737230,-118.376000
2017-03-12,2017-03-12,34.724912,-118.396906
2017-03-24,2017-03-24,34.751371,-118.252298
...,...,...,...
2020-03-29,2020-03-29,34.729131,-118.381763
2020-04-24,2020-04-24,34.724444,-118.402500
2020-04-25,2020-04-25,34.747997,-118.318550
2019-03-31,2019-03-31,34.703987,-118.331603


In [42]:
observed_antelope['n'] = 1
observed_antelope

Unnamed: 0_level_0,observed_on,latitude,longitude,n
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-03-11,2017-03-11,34.749275,-118.395100,1
2017-03-21,2017-03-21,34.739843,-118.385018,1
2017-03-21,2017-03-21,34.737230,-118.376000,1
2017-03-12,2017-03-12,34.724912,-118.396906,1
2017-03-24,2017-03-24,34.751371,-118.252298,1
...,...,...,...,...
2020-03-29,2020-03-29,34.729131,-118.381763,1
2020-04-24,2020-04-24,34.724444,-118.402500,1
2020-04-25,2020-04-25,34.747997,-118.318550,1
2019-03-31,2019-03-31,34.703987,-118.331603,1


In [43]:
observed_antelope.resample('2W').sum()

Unnamed: 0_level_0,latitude,longitude,n
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-03-12,69.474187,-236.792006,2
2017-03-26,277.893653,-946.925263,8
2017-04-09,486.221292,-1657.465374,14
2017-04-23,347.326829,-1183.943707,10
2017-05-07,34.736032,-118.392306,1
...,...,...,...
2020-03-22,173.672283,-591.957578,5
2020-04-05,173.643651,-591.939378,5
2020-04-19,243.083780,-828.378170,7
2020-05-03,902.835988,-3077.663879,26
