In [1]:
import json
import re
import os
import calendar

import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output
from jupyter_dash import JupyterDash
import plotly.express as px
import plotly.graph_objects as go


pd.set_option('display.max_rows', 500)
months = calendar.month_abbr[1:]

In [2]:
file_path = r'..\data\Monthly Count Records (excel versions)\SEMBC_XLS_Yearly\embc2013.xlsx'
df = pd.read_excel(file_path,  sheet_name='May', header=None)

In [3]:
df[0] = df[0].fillna('')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,"Date: May 12, 2013 Start: 07:30 Finish: ...",,,,,,,,,,...,,,,,,,,,,
1,Tide State: high Tide Movement: falling ...,,,,,,,,,,...,,,,,,,,,,
2,"Observers: Alden Roberge, Chris Dale, Vanessa...",,,,,,,,,,...,,,,,,,,,,
3,"Wally Fletcher, Patrick MacNamara, Judith Vetsch",,,,,,,,,,...,,,,,,,,,,
4,Species: 71,Total,OE,WD,SR,TD1,TD2,TD3,EF1,EF2,...,EC,EM,BP,NF1,NF2,SA,IM,MC1,MC2,DW
5,Common Loon,2,2,,,,,,,,...,,,,,,,,,,
6,Pelagic Cormorant,4,,,,,,,,,...,,,4,,,,,,,
7,Great Blue Heron,9,,,,,,,7,,...,,,,,,,,2,,
8,Turkey Vulture,2,1,,,,,,,,...,,,,,,1,,,,
9,Canada Goose,202,37,,13,,,,90,,...,15,,6,,,,,25,8,


In [4]:
info_1 = df.iloc[0,0]
info_2 = df.iloc[1,0]
info_3 = df.iloc[2,0]
info_4 = df.iloc[3,0]
info = info_1 + '  ' + info_2 + '  ' + info_3 + '  ' + info_4 + '  '


In [5]:
observers_match = re.search(r'(Observers: +)([\w -,]+\w+)(   *)', info)
observers = observers_match.group(2)
observers

'Alden Roberge, Chris Dale, Vanessa Lambertus, Amanda Newman, Hilary Dymond, Kira Sufalka,  Wally Fletcher, Patrick MacNamara, Judith Vetsch'

In [6]:
equipment_match = re.search(r'(Equipment: +)([\w -]+\w+)(   *)', info)
equipment = equipment_match.group(2)
equipment

'scope'

In [7]:
tide_movement_match = re.search(r'(Tide Movement: +)([\w -]+\w+)(   *)', info)
tide_movement = tide_movement_match.group(2)
tide_movement

'falling'

In [8]:
tide_state_match = re.search(r'(Tide State: +)([\w -]+\w+)(   *)', info)
tide_state = tide_state_match.group(2)
tide_state

'high'

In [9]:
sky_match = re.search(r'(Sky: +)([\w -]+\w+)(   *)', info)
sky = sky_match.group(2)
sky

'overcast'

In [10]:
sea_state_match = re.search(r'(Sea State: +)([\w -]+\w+)(   *)', info)
sea_state = sea_state_match.group(2)
sea_state

'wavy'

In [11]:
date_match = re.search(r'(Date:\s+)([\w -,]+\w+)(   *)', info)
date = date_match.group(2)
date

'May 12, 2013'

In [12]:
start_match = re.search(r'(Start:\s+)(\w+:\w+)(   *)', info_1)
start = start_match.group(2)
start

'07:30'

In [13]:
finish_match = re.search(r'(Finish:\s+)(\w+:\w+)(   *)', info_1)
finish = finish_match.group(2)
finish

'12:15'

In [14]:
precip_match = re.search(r'(Precip: +)([\w -]+\w+)(   *)', info)
precip = precip_match.group(2)
precip

'rain'

In [15]:
# get the header row
idx = df[df[0].str.contains(r'Species: +\w+', regex=True, na=False)].index
idx[0]

4

In [31]:
df1 = pd.read_excel(file_path,  sheet_name='May', header=idx[0])
columns = df1.columns
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 78 entries, 0 to 77
Data columns (total 22 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Species: 71  75 non-null     object
 1   Total        78 non-null     object
 2   OE           10 non-null     object
 3   WD           8 non-null      object
 4   SR           7 non-null      object
 5   TD1          6 non-null      object
 6   TD2          14 non-null     object
 7   TD3          10 non-null     object
 8   EF1          29 non-null     object
 9   EF2          18 non-null     object
 10  CC           17 non-null     object
 11  CS           32 non-null     object
 12  EC           12 non-null     object
 13  EM           11 non-null     object
 14  BP           24 non-null     object
 15  NF1          18 non-null     object
 16  NF2          18 non-null     object
 17  SA           26 non-null     object
 18  IM           16 non-null     object
 19  MC1          35 non-null     ob

In [18]:
df1

Unnamed: 0,Species: 71,Total,OE,WD,SR,TD1,TD2,TD3,EF1,EF2,...,EC,EM,BP,NF1,NF2,SA,IM,MC1,MC2,DW
0,Common Loon,2,2,,,,,,,,...,,,,,,,,,,
1,Pelagic Cormorant,4,,,,,,,,,...,,,4,,,,,,,
2,Great Blue Heron,9,,,,,,,7,,...,,,,,,,,2,,
3,Turkey Vulture,2,1,,,,,,,,...,,,,,,1,,,,
4,Canada Goose,202,37,,13,,,,90,,...,15,,6,,,,,25,8,
5,Goose sp.,26,,,,,,,,,...,,,26,,,,,,,
6,Mallard,56,,,,,,,7,,...,6,25,,,,,1,4,1,3
7,Bufflehead,1,,,,,,,,,...,,,,,,,,1,,
8,Common Goldeneye,20,,,,,,,,,...,,,,,,,,,,
9,Northern Shoveler,50,,,,,,,40,,...,,,,,,,,,,


In [32]:
# Drop nulls in the species column
df2 = df1.dropna(subset=[columns[0]])

In [33]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 75 entries, 0 to 77
Data columns (total 22 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Species: 71  75 non-null     object
 1   Total        75 non-null     object
 2   OE           7 non-null      object
 3   WD           5 non-null      object
 4   SR           4 non-null      object
 5   TD1          3 non-null      object
 6   TD2          11 non-null     object
 7   TD3          7 non-null      object
 8   EF1          26 non-null     object
 9   EF2          15 non-null     object
 10  CC           14 non-null     object
 11  CS           29 non-null     object
 12  EC           9 non-null      object
 13  EM           8 non-null      object
 14  BP           21 non-null     object
 15  NF1          15 non-null     object
 16  NF2          15 non-null     object
 17  SA           23 non-null     object
 18  IM           13 non-null     object
 19  MC1          32 non-null     object


In [34]:
# Drop the totals column. This will be calculated later with pandas
df2 = df2.drop(columns='Total')

In [35]:
# fill species count nulls with zero. Then sort by species alphabetically 
df2 = df2.fillna(0)
df2 = df2.sort_values([columns[0]])

In [36]:
df2

Unnamed: 0,Species: 71,OE,WD,SR,TD1,TD2,TD3,EF1,EF2,CC,...,EC,EM,BP,NF1,NF2,SA,IM,MC1,MC2,DW
16,American Coot,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
73,American Goldfinch,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
49,American Robin,0,0,0,0,2,9,9,6,3,...,3,0,5,2,0,6,2,22,14,22
12,Bald Eagle,4,0,0,0,0,0,1,1,1,...,0,0,1,0,1,1,0,0,0,0
42,Barn Swallow,0,0,0,0,2,0,3,0,0,...,0,0,0,0,0,0,0,0,0,0
26,Belted Kingfisher,0,0,0,0,0,0,0,0,0,...,0,0,2,0,0,0,0,3,0,0
44,Black-capped Chickadee,0,0,0,0,0,0,0,0,0,...,0,0,5,0,6,8,1,3,1,6
66,Black-headed Grosbeak,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
56,Black-throated Gray Warbler,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
68,Brown-headed Cowbird,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,3


In [37]:
# Set the index to species and remove the index name
df2 = df2.set_index(columns[0])
df2.index.name=None

In [38]:
df2

Unnamed: 0,OE,WD,SR,TD1,TD2,TD3,EF1,EF2,CC,CS,EC,EM,BP,NF1,NF2,SA,IM,MC1,MC2,DW
American Coot,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
American Goldfinch,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,1
American Robin,0,0,0,0,2,9,9,6,3,20,3,0,5,2,0,6,2,22,14,22
Bald Eagle,4,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,0,0
Barn Swallow,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0
Belted Kingfisher,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,3,0,0
Black-capped Chickadee,0,0,0,0,0,0,0,0,0,6,0,0,5,0,6,8,1,3,1,6
Black-headed Grosbeak,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,2
Black-throated Gray Warbler,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
Brown-headed Cowbird,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,2,0,0,0,3


In [39]:
# Remove the totals rows
df2 = df2.drop('Totals')

In [63]:
# transpose to long form
df4 = pd.DataFrame()
for species in df2.index:
    species_count = df2.loc[species]
    species_df = pd.DataFrame({'id': species_count.index, 'count': species_count.values, 'date': '2022-06', 'species': species})
    df4 = pd.concat([df4, species_df])
df4

Unnamed: 0,id,count,date,species
0,OE,0,2022-06,American Coot
1,WD,0,2022-06,American Coot
2,SR,0,2022-06,American Coot
3,TD1,0,2022-06,American Coot
4,TD2,0,2022-06,American Coot
...,...,...,...,...
15,SA,4,2022-06,Yellow-rumped Warbler
16,IM,0,2022-06,Yellow-rumped Warbler
17,MC1,4,2022-06,Yellow-rumped Warbler
18,MC2,4,2022-06,Yellow-rumped Warbler


In [61]:
# Total number of species for individual areas
total_count = df2.gt(0).sum(axis=0)
df3 = pd.DataFrame({'id': total_count.index, 'counts': total_count.values, 'date':'2022-06', 'species': 'Total Species'})
df3

Unnamed: 0,id,counts,date,species
0,OE,6,2022-06,Total Species
1,WD,4,2022-06,Total Species
2,SR,3,2022-06,Total Species
3,TD1,2,2022-06,Total Species
4,TD2,10,2022-06,Total Species
5,TD3,6,2022-06,Total Species
6,EF1,25,2022-06,Total Species
7,EF2,14,2022-06,Total Species
8,CC,13,2022-06,Total Species
9,CS,28,2022-06,Total Species


# Load Data from excel sheets

In [64]:
def read_excel_file(year):

    excel_df = pd.DataFrame()
    file_path = os.path.join(r'..\data\Monthly Count Records (excel versions)\SEMBC_XLS_Yearly', f'embc{year}.xlsx')
    
    for month in months:
        df = pd.read_excel(file_path,  sheet_name=month, header=None)
        # find header row and reload file from that row
        header_idx = df[df[2].str.contains(r'OE', regex=True, na=False)].index
        df1 = pd.read_excel(file_path,  sheet_name=month, header=header_idx[0])
        columns = df1.columns
        # find extra header rows and remove
        df1[columns[2]] = df1[columns[2]].astype('str')
        extra_header_idx = df1[df1[columns[2]].str.contains(r'OE', regex=True, na=False)].index
        if len(extra_header_idx) > 0:
            for header_row_idx in extra_header_idx:
                df1 = df1.drop(header_row_idx)
        
        df2 = df1.dropna(subset=[columns[0]])
        df2 = df2.drop(columns='Total')
        df2 = df2.fillna(0)
        df2 = df2.sort_values([columns[0]])
        total_idx = df2[df2[columns[0]].str.contains(r'Totals|Overall.*', regex=True, na=False)].index
        df2 = df2.drop(total_idx)

        # drop species that start with asterisk
        asterisk_idx = df2[df2[columns[0]].str.contains(r'^\*', regex=True, na=False)].index
        df2 = df2.drop(asterisk_idx)

        df2 = df2.set_index(columns[0])
        df2.index.name=None

        # individual species count
        for species in df2.index:
            species_count = df2.loc[species]
            species_count = pd.to_numeric(species_count, errors='coerce').fillna(0)
            species_df = pd.DataFrame({'id': species_count.index, 'count': species_count.values, 'date':f'{month}-{year}', 'year': year, 'month': month,'species': species})
            excel_df = pd.concat([excel_df, species_df])

    excel_df['count'] = excel_df['count'].astype('int64')
    return excel_df

In [65]:
excel_df = pd.DataFrame()
for year in range(2010,2023):
    excel_year_df = read_excel_file(year)
    excel_df = pd.concat([excel_df, excel_year_df])


In [66]:
excel_df

Unnamed: 0,id,count,date,year,month,species
0,OE,0,Jan-2010,2010,Jan,American Robin
1,WD,0,Jan-2010,2010,Jan,American Robin
2,SR,0,Jan-2010,2010,Jan,American Robin
3,TD1,0,Jan-2010,2010,Jan,American Robin
4,TD2,0,Jan-2010,2010,Jan,American Robin
...,...,...,...,...,...,...
15,SA,0,Dec-2022,2022,Dec,Western Grebe
16,IM,0,Dec-2022,2022,Dec,Western Grebe
17,MC1,0,Dec-2022,2022,Dec,Western Grebe
18,MC2,0,Dec-2022,2022,Dec,Western Grebe


In [67]:
# Replacements Dictionary to correct spelling from excel sheets
replacements = {
    'Accipter sp.': 'Accipiter sp.',
    'American Crow x Northwestern Crow': 'American Crow',
    'American Robbin': 'American Robin',
    'American Wigeion': 'American Wigeon',
    'Anna’s Humming Bird': "Anna's Hummingbird",
    'Anna’s Hummingbird': "Anna's Hummingbird",
    'Back Swift': 'Black Swift',
    'Barrows Goldeneye': "Barrow's Goldeneye",
    "Barrrow's Goldeneye": "Barrow's Goldeneye",
    'Black-throated Grey Warbler': 'Black-throated Gray Warbler',
    'Blue Grouse (Sooty)': 'Sooty Grouse',
    'Bustit': 'Bushtit',
    'California (Western) Scrub-Jay': 'California Scrub-Jay',
    'Cedar Wagwing': 'Cedar Waxwing',
    'Chestnut-back Chickadee': 'Chestnut-backed Chickadee',
    ' Dark-eyed Junco': 'Dark-eyed Junco',
    'Doubel-crested Cormorant': 'Double-crested Cormorant',
    'Doule-crested Cormorant': 'Double-crested Cormorant',
    'Eurasian Collar-Dove': 'Eurasian Collared Dove',
    'Eurasian Collared-Dove': 'Eurasian Collared Dove',
    'Flycatcher, sp.': 'Flycatcher sp.',
    'Glaucous-winged x Herring Gull': 'Glaucous-winged Gull',
    'Gul sp.': 'Gull sp.',
    'Gull Hybrid': 'Gull, Hybrid',
    'Gull, Glaucous-winged x Western': 'Glaucous-winged Gull',
    'Gull, Western x Glaucous-wing': 'Glaucous-winged Gull',
    'Gull, Western x Glaucous-winged': 'Glaucous-winged Gull',
    'Gull,Glaucous-winged x Western': 'Glaucous-winged Gull',
    'Hawk Species': 'Hawk sp.',
    'Hawk, sp.': 'Hawk sp.',
    "Lincoln's Sparow": "Lincoln's Sparrow",
    'Lincoln’s Sparrow': "Lincoln's Sparrow",
    'Marsh Wren ': 'Marsh Wren',
    'Norther Harrier': 'Northern Harrier',
    'Northern Pygmy-owl': 'Northern Pygmy Owl',
    'Northern Pygmy-Owl': 'Northern Pygmy Owl',
    'Northwestern Crow': 'American Crow',
    'Nthn Rough-winged Swallow': 'Northern Rough-winged Swallow',
    'Nthn. Rough-winged Swallow': 'Northern Rough-winged Swallow',
    'Nthn. Rough-winges Swallow': 'Northern Rough-winged Swallow',
    'Olive-sided Flcatcher': 'Olive-sided Flycatcher',
    'Pacific-Slope Flycatcher': 'Pacific-slope Flycatcher',
    'Passerine sp': 'Passerine sp.',
    'Pelgagic Cormorant': 'Pelagic Cormorant',
    'Peregrin Falcon': 'Peregrine Falcon',
    'Peregrine': 'Peregrine Falcon',
    'Peregrine Falacon': 'Peregrine Falcon',
    'Pied -billed Grebe': 'Pied-billed Grebe',
    'Pied Bil Grebe': 'Pied-billed Grebe',
    'Pileated Wodpecker': 'Pileated Woodpecker',
    'Red Throated Loon': 'Red-throated Loon',
    'Red-tailed hawk': 'Red-tailed Hawk',
    'Ring-biled Gull': 'Ring-billed Gull',
    'Rock Dove': 'Pigeon',
    'Rock Dove (Pigeon)': 'Pigeon',
    'Rock Dove (Rock Pigeon)': 'Pigeon',
    'Rock Pigeon': 'Pigeon',
    'Ruby-crowned Kinglety': 'Ruby-crowned Kinglet',
    'Savannah': 'Savannah Sparrow',
    'Scaup Sp.': 'Scaup sp.',
    'Scaup sp': 'Scaup sp.',
    'Semi-palmated Sandpiper':  'Semipalmated Sandpiper',
    'Sharp-Shinned Hawk': 'Sharp-shinned Hawk', 
    'Sharp-shinned Hawk*': 'Sharp-shinned Hawk',
    'Short-blled Gull': 'Short-billed Gull',
    'Sooty (Blue) Grouse': 'Sooty Grouse',
    'Spotted Sandiper': 'Spotted Sandpiper',
    "Stellar's Jay": "Steller's Jay",
    'Steller’s Jay': "Steller's Jay",
    'Stellar’s Jay': "Steller's Jay",
    'Swainson’s Thrush': "Swainson's Thrush",
    'Townsend’s Warbler': "Townsend's Warbler",
    "Vaux' Swift": "Vaux's Swift",
    'Vaux’s Swift': "Vaux's Swift",
    'White-Winged Crossbill': 'White-winged Crossbill',
    'Winter (Pacific) Wren': 'Pacific Wren',
    'Winter Wren': 'Pacific Wren',
    'Winter Wren (Pacific Wren)': 'Pacific Wren',
    'Winter Wren (Pacific)': 'Pacific Wren',
    "Wison's Warbler": "Wilson's Warbler",
    'Woodpecker sp': 'Woodpecker sp.',
    'chickadee sp.': 'Chickadee sp.',
    'finch sp.': 'Finch sp.',
    'gull sp.': 'Gull sp.',
    'hummingbird sp.': 'Hummingbird sp.',
    'sparrow sp.': 'Sparrow sp.',
    'swallow sp.': 'Swallow sp.',
}

In [68]:
df = excel_df.replace(replacements)

In [70]:
spell_checked_df = df.replace(replacements)

In [72]:
df[(df['species']=='Dark-eyed Junco') & (df['year']==2013)]

Unnamed: 0,id,count,date,year,month,species
0,OE,0,Jan-2013,2013,Jan,Dark-eyed Junco
1,WD,0,Jan-2013,2013,Jan,Dark-eyed Junco
2,SR,0,Jan-2013,2013,Jan,Dark-eyed Junco
3,TD1,0,Jan-2013,2013,Jan,Dark-eyed Junco
4,TD2,0,Jan-2013,2013,Jan,Dark-eyed Junco
5,TD3,0,Jan-2013,2013,Jan,Dark-eyed Junco
6,EF1,0,Jan-2013,2013,Jan,Dark-eyed Junco
7,EF2,0,Jan-2013,2013,Jan,Dark-eyed Junco
8,CC,10,Jan-2013,2013,Jan,Dark-eyed Junco
9,CS,8,Jan-2013,2013,Jan,Dark-eyed Junco


In [73]:
# Calculate total individual bird count by area and concat
areas_total_bird_count_df = spell_checked_df.groupby(['id','year','month', 'date']).sum('count').reset_index()
areas_total_bird_count_df['species'] = 'Total Bird Count'
areas_total_added_df = pd.concat([spell_checked_df, areas_total_bird_count_df])

In [74]:
areas_total_bird_count_df

Unnamed: 0,id,year,month,date,count,species
0,BP,2010,Apr,Apr-2010,212,Total Bird Count
1,BP,2010,Aug,Aug-2010,137,Total Bird Count
2,BP,2010,Dec,Dec-2010,125,Total Bird Count
3,BP,2010,Feb,Feb-2010,170,Total Bird Count
4,BP,2010,Jan,Jan-2010,188,Total Bird Count
...,...,...,...,...,...,...
3119,WD,2022,Mar,Mar-2022,40,Total Bird Count
3120,WD,2022,May,May-2022,15,Total Bird Count
3121,WD,2022,Nov,Nov-2022,7,Total Bird Count
3122,WD,2022,Oct,Oct-2022,8,Total Bird Count


In [75]:
# Calculate total birds across all areas
all_area_total_bird = areas_total_added_df.groupby(['year','month', 'date', 'species']).sum('count').reset_index()
all_area_total_bird['id'] = 'ALL'
all_area_total_added_df = pd.concat([areas_total_added_df, all_area_total_bird])

In [76]:
all_area_total_bird

Unnamed: 0,year,month,date,species,count,id
0,2010,Apr,Apr-2010,American Coot,1,ALL
1,2010,Apr,Apr-2010,American Crow,50,ALL
2,2010,Apr,Apr-2010,American Dipper,1,ALL
3,2010,Apr,Apr-2010,American Goldfinch,1,ALL
4,2010,Apr,Apr-2010,American Robin,316,ALL
...,...,...,...,...,...,...
9710,2022,Sep,Sep-2022,Violet-green Swallow,1,ALL
9711,2022,Sep,Sep-2022,Virginia Rail,1,ALL
9712,2022,Sep,Sep-2022,White-crowned Sparrow,55,ALL
9713,2022,Sep,Sep-2022,Willow Flycatcher,1,ALL


In [77]:
all_area_total_added_df[(all_area_total_added_df['species']=='Dark-eyed Junco') & (all_area_total_added_df['year']==2013)]

Unnamed: 0,id,count,date,year,month,species
0,OE,0,Jan-2013,2013,Jan,Dark-eyed Junco
1,WD,0,Jan-2013,2013,Jan,Dark-eyed Junco
2,SR,0,Jan-2013,2013,Jan,Dark-eyed Junco
3,TD1,0,Jan-2013,2013,Jan,Dark-eyed Junco
4,TD2,0,Jan-2013,2013,Jan,Dark-eyed Junco
5,TD3,0,Jan-2013,2013,Jan,Dark-eyed Junco
6,EF1,0,Jan-2013,2013,Jan,Dark-eyed Junco
7,EF2,0,Jan-2013,2013,Jan,Dark-eyed Junco
8,CC,10,Jan-2013,2013,Jan,Dark-eyed Junco
9,CS,8,Jan-2013,2013,Jan,Dark-eyed Junco


In [97]:
# Calculate the number of species in alls area
# First remove "Total Bird Count" from totals
all_area_species = all_area_total_added_df[all_area_total_added_df['species']!='Total Bird Count']
all_area_total_species = all_area_species.groupby(['id', 'date', 'year', 'month']).apply(lambda x: x['count'].gt(0).sum(axis=0)).reset_index(name='count')
all_area_total_species['species'] = 'Total Species Count'
areas_and_species_totals_added_df = pd.concat([all_area_total_added_df, all_area_total_species])


In [80]:
areas_and_species_totals_added_df[(areas_and_species_totals_added_df['species']=='Total Species Count') & (areas_and_species_totals_added_df['year']==2013)]

Unnamed: 0,id,count,date,year,month,species
3,ALL,74,Apr-2013,2013,Apr,Total Species Count
16,ALL,56,Aug-2013,2013,Aug,Total Species Count
29,ALL,41,Dec-2013,2013,Dec,Total Species Count
42,ALL,57,Feb-2013,2013,Feb,Total Species Count
55,ALL,63,Jan-2013,2013,Jan,Total Species Count
68,ALL,57,Jul-2013,2013,Jul,Total Species Count
81,ALL,71,Jun-2013,2013,Jun,Total Species Count
94,ALL,60,Mar-2013,2013,Mar,Total Species Count
107,ALL,74,May-2013,2013,May,Total Species Count
120,ALL,56,Nov-2013,2013,Nov,Total Species Count


In [81]:
# test_final3[test_final3['month'].isnull()]
len(areas_and_species_totals_added_df.species.unique())

250

In [82]:
# sort data with months in the proper order
df_loaded = areas_and_species_totals_added_df
df_loaded['month'] = pd.Categorical(df_loaded['month'], months)
df_loaded = df_loaded.sort_values(['year', 'month','species']).reset_index(drop=True)


In [83]:
df_loaded[(df_loaded['year']==2013) & (df_loaded['id']=='ALL') & (df_loaded['species']=='Dark-eyed Junco')]


Unnamed: 0,id,count,date,year,month,species
48583,ALL,168,Jan-2013,2013,Jan,Dark-eyed Junco
49885,ALL,88,Feb-2013,2013,Feb,Dark-eyed Junco
51103,ALL,115,Mar-2013,2013,Mar,Dark-eyed Junco
52447,ALL,95,Apr-2013,2013,Apr,Dark-eyed Junco
55764,ALL,2,Jun-2013,2013,Jun,Dark-eyed Junco
60993,ALL,33,Oct-2013,2013,Oct,Dark-eyed Junco
62315,ALL,53,Nov-2013,2013,Nov,Dark-eyed Junco
63428,ALL,74,Dec-2013,2013,Dec,Dark-eyed Junco


In [85]:
# Fill in data counts of 0 for birds that were not seen in a particular month
zero_counts_added = df_loaded.copy()
for year in range(2010,2023):
    year_all_total = df_loaded[(df_loaded['year']==year) & (df_loaded['id']=='ALL')]

    # Build template to use for merging 0 counts with existing data. The 
    # dataframe has no count column. When merged with existing data, rows with 
    # a n/a in the count column had no previous count and can be filled with 0
    merge_template = pd.DataFrame()
    for specie in year_all_total.species.unique():
        temp = pd.DataFrame({'id': 'ALL', 'date': [f'{month}-{year}' for month in months], 'year': year, 'month':months, 'species': specie})
        # if specie == 'Dark-eyed Junco':
        #     print(temp)
        merge_template = pd.concat([merge_template, temp])
        
    # merge with the count and fill na with 0
    merge_df = merge_template.merge(year_all_total, how='left', on=['id', 'date', 'year', 'month', 'species']).fillna(0)

    # extract just the newly created zero counts and add to existing data
    zero_counts = merge_df[merge_df['count']==0]
    zero_counts_added = pd.concat([zero_counts_added, zero_counts])


In [87]:
zero_counts_added[(zero_counts_added['species']=='Dark-eyed Junco') & (zero_counts_added['year']==2013) & (zero_counts_added['id']=='ALL')]

Unnamed: 0,id,count,date,year,month,species
48583,ALL,168.0,Jan-2013,2013,Jan,Dark-eyed Junco
49885,ALL,88.0,Feb-2013,2013,Feb,Dark-eyed Junco
51103,ALL,115.0,Mar-2013,2013,Mar,Dark-eyed Junco
52447,ALL,95.0,Apr-2013,2013,Apr,Dark-eyed Junco
55764,ALL,2.0,Jun-2013,2013,Jun,Dark-eyed Junco
60993,ALL,33.0,Oct-2013,2013,Oct,Dark-eyed Junco
62315,ALL,53.0,Nov-2013,2013,Nov,Dark-eyed Junco
63428,ALL,74.0,Dec-2013,2013,Dec,Dark-eyed Junco
256,ALL,0.0,May-2013,2013,May,Dark-eyed Junco
258,ALL,0.0,Jul-2013,2013,Jul,Dark-eyed Junco


In [88]:
# Sort data
df = zero_counts_added
df['month'] = pd.Categorical(df['month'], months)
df = df.sort_values(['year', 'month','species']).reset_index(drop=True)
df['count'] = df['count'].astype('int64')

In [89]:
len(df.species.unique())

250

In [90]:
# Stats for data
def get_stats_df(df):
    """Return a dataframe with some statistics"""
    stats_df = df[df['id']=='ALL']
    stats_df = stats_df.groupby(
        ['month', 'species']
        ).agg(
            mean=('count', np.mean),
            median=('count', np.median),
            std=('count', np.std),
            min=('count', np.min),
            max=('count', np.max)
        ).reset_index()

    stats_df['month'] = pd.Categorical(stats_df['month'], months)
    stats_df = stats_df.sort_values(['month','species']).reset_index(drop=True)
    
    return stats_df

In [91]:
get_stats_df(df[(df['species']== "Dark-eyed Junco") & (df['id']=='ALL')])

Unnamed: 0,month,species,mean,median,std,min,max
0,Jan,Dark-eyed Junco,135.923077,134.0,56.301956,52,217
1,Feb,Dark-eyed Junco,84.461538,71.0,32.255789,47,156
2,Mar,Dark-eyed Junco,98.615385,102.0,51.640647,16,227
3,Apr,Dark-eyed Junco,106.538462,95.0,97.978412,4,329
4,May,Dark-eyed Junco,10.384615,1.0,21.696922,0,70
5,Jun,Dark-eyed Junco,0.923077,1.0,1.38212,0,5
6,Jul,Dark-eyed Junco,0.384615,0.0,0.767948,0,2
7,Aug,Dark-eyed Junco,0.615385,0.0,1.192928,0,4
8,Sep,Dark-eyed Junco,1.307692,1.0,1.493576,0,4
9,Oct,Dark-eyed Junco,52.538462,51.0,26.244413,14,115


# Save data

In [92]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222262 entries, 0 to 222261
Data columns (total 6 columns):
 #   Column   Non-Null Count   Dtype   
---  ------   --------------   -----   
 0   id       222262 non-null  object  
 1   count    222262 non-null  int64   
 2   date     222262 non-null  object  
 3   year     222262 non-null  int64   
 4   month    222262 non-null  category
 5   species  222262 non-null  object  
dtypes: category(1), int64(2), object(3)
memory usage: 8.7+ MB


In [93]:
df.to_csv('count_data.csv')

# App

read areas and mapbox token

In [3]:
with open(
    r"C:\Users\kylek\OneDrive\Documents\Code\shared_with_VM\bird_count\data\areas.json"
) as areas_file:
    areas = json.load(areas_file)
with open(
    r"C:\Users\kylek\OneDrive\Documents\Code\shared_with_VM\bird_count\.mapbox_token"
) as token_file:
    token = token_file.read()



Get Data from csv

In [4]:
df = pd.read_csv(r'C:\Users\kylek\OneDrive\Documents\Code\shared_with_VM\bird_count\bird_count\data\count_data.csv', index_col=0)

In [7]:
app = JupyterDash(__name__)

app.layout = html.Div(
    id="app-container",
    children=[
        html.Div(
            id="sidebar-container",
            children=[
                html.H1("Squamish Monthly Bird Count"),
                dcc.Tabs(
                    id="tabs",
                    value="tab-graph",
                    children=[
                        dcc.Tab(label="Graph", value="tab-graph"),
                        dcc.Tab(label="Map", value="tab-map"),
                    ],
                ),
                html.P("Select Species:"),
                        dcc.Dropdown(
                            id="species-dropdown",
                            options=sorted(df["species"].unique()),
                            value="Total Species Count",
                            clearable=False,
                        ),
                html.Div(
                    id='sidebar-content',
                    ),
            ],
        ),
        html.Div(
            id="content-container",
        ),
    ],
)

@app.callback(
    Output('sidebar-content', 'children'),
    Input('tabs', 'value')
)
def render_sidebar_content(tab):
    if tab == 'tab-map':
        return []
    elif tab == 'tab-graph':
        return [
            # html.P("Line Shape:"),
            dcc.RadioItems(
                ['spline', 'linear',],
                'spline',
                id='line-shape-radio',
                inline=True
            ),
            dcc.Checklist(
                ['Average', 'Standard Deviation'],
                [],
                id='average-checklist'
            )
        ]


@app.callback(
    Output('content-container', 'children'),
    Input('tabs', 'value')
)
def render_content(tab):
    if tab == 'tab-map':
        return dcc.Graph(
            id="count-map",
            config=dict(responsive=True),
        )
    elif tab == 'tab-graph':
        return dcc.Graph(
            id="count-graph",
            config=dict(responsive=True),
        )

@app.callback(
    Output("count-map", "figure"), 
    Input("species-dropdown", "value")
)
def update_map(species):
    dff = df[df["species"] == species]
    
    fig = px.choropleth_mapbox(
        dff,
        geojson=areas,
        locations="id",
        featureidkey="properties.id",
        color="count",
        color_continuous_scale="Purples",
        range_color=(0, dff["count"].max()),
        zoom=12.5,
        center={"lat": 49.7, "lon": -123.15},
        opacity=0.5,
        labels={"count": "Count", "id": "Area"},
        animation_frame="date",
        template="plotly_dark",
    )
    fig.update_layout(
        margin={"r": 20, "t": 20, "l": 20, "b": 20},
        mapbox_accesstoken=token,
        mapbox_style="satellite-streets",
    )
    if fig["layout"]["updatemenus"]:
        fig["layout"]["updatemenus"][0]["pad"] = dict(r=20, t=25)
        fig["layout"]["sliders"][0]["pad"] = dict(r=0, t=0, b=20)

    return fig

@app.callback(
    Output("count-graph", "figure"), 
    Input("species-dropdown", "value"),
    Input("line-shape-radio", "value"),
    Input('average-checklist', 'value'),
)
def update_graph(species, line_shape, average_checklist):
    dff = df[(df['species']== species) & (df['id']=='ALL')]
    stats_df = get_stats_df(dff)

    fig = px.line(
        dff,
        x='month',
        y='count',
        category_orders= {'month': months},
        color="year",
        color_discrete_sequence=px.colors.qualitative.Light24,
        template="plotly_dark",
        markers=True,
        line_shape=line_shape,
    )

    if 'Average' in average_checklist:
        # Average
        fig.add_trace(
            go.Scatter(
                x=stats_df['month'],
                y=stats_df['mean'], 
                mode="lines",
                line_shape=line_shape,
                name='Average',
                line={'width':4, 'color':'white'},
                showlegend=False
            )
        )
    
    if 'Standard Deviation' in average_checklist:
        # Standard deviation line
        average_plus_std = list(stats_df['mean']+stats_df['std'])
        average_minus_std = list(stats_df['mean']-stats_df['std'])
        rev_average_minus_std = average_minus_std[::-1]
        rev_average_minus_std = [x if x > 0 else 0 for x in rev_average_minus_std]

        fig.add_trace(
            go.Scatter(
                x=months+months[::-1],
                y=average_plus_std+rev_average_minus_std,
                fill='toself',
                fillcolor='rgba(255,255,255,0.3)',
                line_color='rgba(255,255,255,0)',
                mode="lines",
                line_shape=line_shape,
                name='Standard Deviation',
                line={'width':4, 'color':'white'},
                showlegend=False
            )
        )

    fig.update_layout(
        margin={"r": 20, "t": 20, "l": 20, "b": 20},
    )

    fig.data = fig.data[::-1]
    return fig



if __name__ == "__main__":
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash app running on http://127.0.0.1:8050/
