In [14]:
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm

In [3]:
def make_the_call(path, endpoint, **kwargs):
    url = f'http://www.usanpn.org/npn_portal/{path}/{endpoint}.json'
    if kwargs:
        query = '&'.join([f'{key}={value}' for key, value in kwargs.items()])
        url = f'{url}?{query}'
    return requests.request(
        url=url,
        method='GET'
    ).json()

make_the_call('species', 'getSpeciesById', species_id=1500)

{'common_name': 'pink trumpet-tree',
 'genus': 'Tabebuia',
 'species': 'rosea',
 'kingdom': 'Plantae',
 'itis_taxonomic_sn': 182287}

In [4]:
ma = pd.DataFrame(make_the_call('species', 'getSpeciesByState', state='MA'))
ma

Unnamed: 0,species_id,common_name,genus,kingdom,species,itis_taxonomic_sn
0,1436,absinthium,Artemisia,Plantae,absinthium,35445.0
1,1227,Acadian flycatcher,Empidonax,Animalia,virescens,178339.0
2,174,alfalfa,Medicago,Plantae,sativa,183623.0
3,1174,Allegheny serviceberry,Amelanchier,Plantae,laevis,182046.0
4,1446,alternateleaf dogwood,Cornus,Plantae,alternifolia,27813.0
...,...,...,...,...,...,...
672,154,yellow star-thistle,Centaurea,Plantae,solstitialis,36972.0
673,175,yellow sweetclover,Melilotus,Plantae,officinalis,26150.0
674,1675,yellow thistle,Cirsium,Plantae,horridulum,36379.0
675,358,yellow warbler,Setophaga,Animalia,petechia,950039.0


In [40]:
ma[ma['genus'] == 'Juglans']

Unnamed: 0,species_id,common_name,genus,kingdom,species,itis_taxonomic_sn
78,80,black walnut,Juglans,Plantae,nigra,19254.0


In [154]:
def get_pheno_df(species_id):
    return pd.DataFrame(
        make_the_call(
            'phenophases', 
            'getPhenophasesForSpecies', 
            **{'species_id[0]': species_id, 'date': '2020-01-01', 'return_all': True}
        )[0]['phenophases']
    )
phenos = get_pheno_df(80)
phenos

http://www.usanpn.org/npn_portal/phenophases/getPhenophasesForSpecies.json?species_id[0]=80&date=2020-01-01&return_all=True


Unnamed: 0,phenophase_id,phenophase_name,phenophase_category,phenophase_definition,phenophase_additional_definition,seq_num,color,pheno_class_id,pheno_class_name,pheno_class_sequence,abundance_category,raw_abundance
0,371,Breaking leaf buds,Leaves,One or more breaking leaf buds are visible on ...,,10,Green1,1,Initial shoot or leaf growth,10,39,False
1,483,Leaves,Leaves,"One or more live, unfolded leaves are visible ...",,30,Green1,3,Leaves or needles,30,73,False
2,467,Increasing leaf size,Leaves,A majority of leaves on the plant have not yet...,,40,Green1,2,Young leaves or needles,20,41,False
3,498,Colored leaves,Leaves,One or more leaves show some of their typical ...,,70,Green1,4,Colored leaves or needles,40,74,False
4,471,Falling leaves,Leaves,One or more leaves are falling or have recentl...,,120,Green1,5,Falling leaves or needles,50,-1,False
5,500,Flowers or flower buds,Flowers,One or more fresh open or unopened flowers or ...,"For Juglans nigra, the male inflorescence is a...",170,Green2,6,Flowers or pollen cones,60,48,False
6,501,Open flowers,Flowers,"One or more open, fresh flowers are visible on...","For Juglans nigra, the male flowers will open ...",190,Green2,7,Open flowers or pollen cones,70,50,False
7,502,Pollen release,Flowers,One or more flowers on the plant release visib...,,240,Green2,8,Pollen release,80,51,False
8,516,Fruits,Fruits,One or more fruits are visible on the plant.,"For Juglans nigra, the fruit is a nut covered ...",280,Green3,10,Fruits or seed cones,100,56,False
9,390,Ripe fruits,Fruits,One or more ripe fruits are visible on the pla...,"For Juglans nigra, a fruit is considered ripe ...",300,Green3,12,Ripe fruits or seed cones,120,58,False


In [160]:
def get_obs_df(start, months, species_id):
    rows = []
    START = start
    MONTHS = months
    for month in range(MONTHS):
        start_date = datetime(
            START.year + (month + START.month - 1) // 12, 
            (START.month + month - 1) % 12 + 1,
            1
        )
        end_date = datetime(
            START.year + (month + START.month) // 12, 
            (START.month + month) % 12 + 1,
            1
        )
        form = '%Y-%m-%d'
        observations = make_the_call(
            'observations', 
            'getAllObservationsForSpecies', 
            **{'species_id[0]': species_id, 'start_date': start_date.strftime(form), 'end_date': end_date.strftime(form)}
        )
        for station in observations['station_list']:
            for pheno_id, counts in station['species'][str(species_id)].items():
                yes, no, q = counts.get('y', 0), counts.get('n', 0), counts.get('q', 0)
                rows.append({
                    'year': start_date.year,
                    'month': start_date.month,
                    'station_id': station['station_id'],
                    'phenophase_id': pheno_id,
                    'percent_yes': yes/(yes+no+q),
                })
    df = pd.DataFrame(rows)
    return df

df = get_obs_df(datetime(2019, 1, 1), 12, 80)

http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-01-01&end_date=2019-02-01
http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-02-01&end_date=2019-03-01
http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-03-01&end_date=2019-04-01
http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-04-01&end_date=2019-05-01
http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-05-01&end_date=2019-06-01
http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-06-01&end_date=2019-07-01
http://www.usanpn.org/npn_portal/observations/getAllObservationsForSpecies.json?species_id[0]=80&start_date=2019-07-01&end_date=2019-08-01
http://www.usanpn.org/npn_p

In [151]:
def build_pheno_summary(obs_df, pheno_df):
    gdf = obs_df.groupby(['month', 'phenophase_id']).median()['percent_yes'].reset_index()
    gdf = gdf.merge(
        gdf.groupby('phenophase_id').max()['percent_yes'].reset_index().rename({'percent_yes': 'max_percent_yes'}, axis=1), 
        on='phenophase_id'
    )
    gdf = gdf[gdf['percent_yes'] == gdf['max_percent_yes']].groupby(
        ['phenophase_id', 'percent_yes']
    ).min()['month'].reset_index()
    gdf['phenophase_id'] = gdf['phenophase_id'].astype(int)
    gdf = gdf[gdf['percent_yes'] > 0]
    return gdf.merge(pheno_df[['phenophase_id', 'phenophase_name']], on=['phenophase_id']).sort_values('month')

build_pheno_summary(df, phenos)

Unnamed: 0,phenophase_id,percent_yes,month,phenophase_name
0,371,0.366667,4,Breaking leaf buds
2,467,0.833333,5,Increasing leaf size
4,483,1.0,5,Leaves
7,501,0.166667,5,Open flowers
6,500,0.2,6,Flowers or flower buds
9,516,1.0,7,Fruits
1,390,0.15,9,Ripe fruits
5,498,1.0,9,Colored leaves
3,471,1.0,10,Falling leaves
8,504,0.333333,10,Recent fruit or seed drop


In [162]:
ma[ma['kingdom'] == 'Plantae']

Unnamed: 0,species_id,common_name,genus,kingdom,species,itis_taxonomic_sn
0,1436,absinthium,Artemisia,Plantae,absinthium,35445.0
2,174,alfalfa,Medicago,Plantae,sativa,183623.0
3,1174,Allegheny serviceberry,Amelanchier,Plantae,laevis,182046.0
4,1446,alternateleaf dogwood,Cornus,Plantae,alternifolia,27813.0
5,93,American basswood,Tilia,Plantae,americana,21536.0
...,...,...,...,...,...,...
669,6,yellow marsh marigold,Caltha,Plantae,palustris,18454.0
671,939,yellow pond-lily,Nuphar,Plantae,lutea,503968.0
672,154,yellow star-thistle,Centaurea,Plantae,solstitialis,36972.0
673,175,yellow sweetclover,Melilotus,Plantae,officinalis,26150.0


In [172]:
dfs = []
for species_id in tqdm(ma[ma['kingdom'] == 'Plantae']['species_id'].unique()):
    species_id = int(species_id)
    try:
        df = build_pheno_summary(
            get_obs_df(datetime(2019, 1, 1), 12, species_id), 
            get_pheno_df(species_id)
        )
        df['species_id'] = species_id
        dfs.append(df)
    except:
        pass
df = pd.concat(dfs)

100%|██████████| 461/461 [44:59<00:00,  5.85s/it]


In [174]:
df.to_csv('phenomes.csv', index=False)

In [20]:
df = pd.read_csv('phenomes.csv')
df = df[[c for c in df.columns if c != 'Unnamed: 0']]
df = df.merge(ma, on='species_id')
df

Unnamed: 0,phenophase_id,percent_yes,month,phenophase_name,species_id,common_name,genus,kingdom,species,itis_taxonomic_sn
0,482,1.000000,1,Initial growth,1436,absinthium,Artemisia,Plantae,absinthium,35445.0
1,488,0.983333,7,Leaves,1436,absinthium,Artemisia,Plantae,absinthium,35445.0
2,501,0.518262,8,Open flowers,1436,absinthium,Artemisia,Plantae,absinthium,35445.0
3,482,0.500000,4,Initial growth,174,alfalfa,Medicago,Plantae,sativa,183623.0
4,488,1.000000,6,Leaves,174,alfalfa,Medicago,Plantae,sativa,183623.0
...,...,...,...,...,...,...,...,...,...,...
2262,500,1.000000,6,Flowers or flower buds,175,yellow sweetclover,Melilotus,Plantae,officinalis,26150.0
2263,501,1.000000,6,Open flowers,175,yellow sweetclover,Melilotus,Plantae,officinalis,26150.0
2264,390,0.500000,7,Ripe fruits,175,yellow sweetclover,Melilotus,Plantae,officinalis,26150.0
2265,504,0.333333,7,Recent fruit or seed drop,175,yellow sweetclover,Melilotus,Plantae,officinalis,26150.0


In [21]:
with open('genuses.json', 'r') as fh:
    genuses = json.load(fh)

In [25]:
df['genus'] = df.apply(lambda r: r['genus'].lower(), axis=1)
fdf = df[df['genus'].isin(genuses)]
fdf

Unnamed: 0,phenophase_id,percent_yes,month,phenophase_name,species_id,common_name,genus,kingdom,species,itis_taxonomic_sn
9,371,0.633333,4,Breaking leaf buds,1174,Allegheny serviceberry,amelanchier,Plantae,laevis,182046.0
10,500,0.300000,4,Flowers or flower buds,1174,Allegheny serviceberry,amelanchier,Plantae,laevis,182046.0
11,467,0.750000,5,Increasing leaf size,1174,Allegheny serviceberry,amelanchier,Plantae,laevis,182046.0
12,483,1.000000,5,Leaves,1174,Allegheny serviceberry,amelanchier,Plantae,laevis,182046.0
13,516,0.083333,6,Fruits,1174,Allegheny serviceberry,amelanchier,Plantae,laevis,182046.0
...,...,...,...,...,...,...,...,...,...,...
2239,498,0.981481,9,Colored leaves,97,yellow birch,betula,Plantae,alleghaniensis,19481.0
2240,390,0.586905,10,Ripe fruits,97,yellow birch,betula,Plantae,alleghaniensis,19481.0
2241,516,0.642857,10,Fruits,97,yellow birch,betula,Plantae,alleghaniensis,19481.0
2242,471,0.555556,12,Falling leaves,97,yellow birch,betula,Plantae,alleghaniensis,19481.0


In [28]:
fdf[fdf['month'] == 3]

Unnamed: 0,phenophase_id,percent_yes,month,phenophase_name,species_id,common_name,genus,kingdom,species,itis_taxonomic_sn
53,500,0.805556,3,Flowers or flower buds,1048,American elm,ulmus,Plantae,americana,19049.0
331,490,1.0,3,Pollen cones,48,black spruce,picea,Plantae,mariana,183302.0
1320,371,0.444444,3,Breaking leaf buds,1177,mockernut hickory,carya,Plantae,tomentosa,19247.0
1545,371,0.34,3,Breaking leaf buds,1755,post oak,quercus,Plantae,stellata,19422.0
1574,500,1.0,3,Flowers or flower buds,1008,pussy willow,salix,Plantae,discolor,22524.0
1630,371,1.0,3,Breaking leaf buds,325,red elderberry,sambucus,Plantae,racemosa,35326.0
1647,393,0.5,3,Ripe seed cones,968,red pine,pinus,Plantae,resinosa,183375.0
1805,502,0.05,3,Pollen release,1215,Siberian elm,ulmus,Plantae,pumila,19057.0
1852,371,1.0,3,Breaking leaf buds,1753,sour cherry,prunus,Plantae,cerasus,24773.0
1853,467,1.0,3,Increasing leaf size,1753,sour cherry,prunus,Plantae,cerasus,24773.0


In [34]:
rows = [row for row in fdf[[
    'percent_yes', 'month', 'phenophase_name', 'common_name', 'genus', 'species'
]].rename({'percent_yes': 'score'}, axis=1).T.to_dict().values()]
with open('pheno_events.json', 'w') as fh:
    json.dump(rows, fh)