In [1]:
import json
import re

import pandas as pd
from dash import Dash, dcc, html, Input, Output
from jupyter_dash import JupyterDash
import plotly.express as px


In [49]:
file_path = r'..\data\Monthly Count Records (excel versions)\SEMBC_XLS_Yearly\embc2022.xlsx'
df = pd.read_excel(file_path,  sheet_name='May', header=None)

In [50]:
df[0] = df[0].fillna('')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,"Date: May 8, 2022 Start: 07:15 Finish: 1...",,,,,,,,,,...,,,,,,,,,,
1,Tide State: low Tide Movement: falling ...,,,,,,,,,,...,,,,,,,,,,
2,"Observers: Judith Vetsch, Patrick MacNamara, ...",,,,,,,,,,...,,,,,,,,,,
3,"Brian Storey, Christopher Di Corrado, Amenda N...",,,,,,,,,,...,,,,,,,,,,
4,Species: 92,Total,OE,WD,SR,TD1,TD2,TD3,EF1,EF2,...,EM,BP,NF1,NF2,SA,IM,MC1,MC2,DW,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,House Finch,6,,,,,,,,,...,,3,,,,,1,,2,
97,Pine Siskin,43,,,,,,1,1,1,...,,35,,,5,,,,,
98,American Goldfinch,12,,,,,,,1,1,...,7,,,,,,,,1,
99,,Total,OE,WD,SR,TD1,TD2,TD3,EF1,EF2,...,EM,BP,NF1,NF2,SA,IM,MC1,MC2,DW,


In [6]:
info_1 = df.iloc[0,0]
info_2 = df.iloc[1,0]
info_3 = df.iloc[2,0]
info_4 = df.iloc[3,0]
info = info_1 + '  ' + info_2 + '  ' + info_3 + '  ' + info_4 + '  '


In [7]:
observers_match = re.search(r'(Observers: +)([\w -,]+\w+)(   *)', info)
observers = observers_match.group(2)
observers

'Dave Lassmann, Judith Vetsch, Patrick MacNamara, Chris Murrell, Grant Danielson, Sophie Vielfaure, Karl Ricker, Tiffany Brunke, Kyle Kulas, Chris Dale'

In [8]:
equipment_match = re.search(r'(Equipment: +)([\w -]+\w+)(   *)', info)
equipment = equipment_match.group(2)
equipment

'scope - binoculars only'

In [9]:
tide_movement_match = re.search(r'(Tide Movement: +)([\w -]+\w+)(   *)', info)
tide_movement = tide_movement_match.group(2)
tide_movement

'falling'

In [10]:
tide_state_match = re.search(r'(Tide State: +)([\w -]+\w+)(   *)', info)
tide_state = tide_state_match.group(2)
tide_state

'high'

In [11]:
sky_match = re.search(r'(Sky: +)([\w -]+\w+)(   *)', info)
sky = sky_match.group(2)
sky

'clear'

In [12]:
sea_state_match = re.search(r'(Sea State: +)([\w -]+\w+)(   *)', info)
sea_state = sea_state_match.group(2)
sea_state

'calm - ripple - chop'

In [13]:
date_match = re.search(r'(Date:\s+)([\w -,]+\w+)(   *)', info)
date = date_match.group(2)
date

'January 9, 2022'

In [14]:
start_match = re.search(r'(Start:\s+)(\w+:\w+)(   *)', info_1)
start = start_match.group(2)
start

'08:30'

In [15]:
finish_match = re.search(r'(Finish:\s+)(\w+:\w+)(   *)', info_1)
finish = finish_match.group(2)
finish

'2:00'

In [16]:
precip_match = re.search(r'(Precip: +)([\w -]+\w+)(   *)', info)
precip = precip_match.group(2)
precip

'none'

In [2]:
months = ['Jan',
          'Feb',
          'Mar',
          'Apr',
          'May',
          'Jun',
          'Jul',
          'Aug',
          'Sept',
          'Oct',
          'Nov',
          'Dec']
file_path = r'..\data\Monthly Count Records (excel versions)\SEMBC_XLS_Yearly\embc2022.xlsx'
final = pd.DataFrame()
for month in months:
    df = pd.read_excel(file_path,  sheet_name=month, header=None)
    # idx = df[df[0].str.contains(r'Species: +\w+', regex=True, na=False)].index
    idx = df[df[2].str.contains(r'OE', regex=True, na=False)].index

    df1 = pd.read_excel(file_path,  sheet_name=month, header=idx[0])
    columns = df1.columns
    df2 = df1.dropna(subset=[columns[0]])
    df2 = df2.drop(columns='Total')
    df2 = df2.fillna(0)
    df2 = df2.sort_values([columns[0]])
    total_idx = df2[df2[columns[0]].str.contains(r'Totals|Overall.*', regex=True, na=False)].index
    df2 = df2.drop(total_idx)
    df2 = df2.set_index(columns[0])
    df2.index.name=None
    
    # total species count
    total_count = df2.gt(0).sum(axis=0)
    df3 = pd.DataFrame({'id': total_count.index, 'count': total_count.values, 'date':f'{month}-2022', 'species': 'Total Species Count'})
    final = pd.concat([final,df3] )

    # individual species count
    for species in df2.index:
        species_count = df2.loc[species].T
        species_df = pd.DataFrame({'id': species_count.index, 'count': species_count.values, 'date':f'{month}-2022', 'species': species})
        final = pd.concat([final, species_df])

    final['count'] = final['count'].astype('int64')

In [65]:
month

'Jul'

In [3]:
final

Unnamed: 0,id,count,date,species
0,OE,6,Jan-2022,Total Species Count
1,WD,4,Jan-2022,Total Species Count
2,SR,7,Jan-2022,Total Species Count
3,TD1,5,Jan-2022,Total Species Count
4,TD2,8,Jan-2022,Total Species Count
...,...,...,...,...
15,SA,0,Dec-2022,Western Grebe
16,IM,0,Dec-2022,Western Grebe
17,MC1,0,Dec-2022,Western Grebe
18,MC2,0,Dec-2022,Western Grebe


In [51]:
# get the header row
idx = df[df[0].str.contains(r'Species: +\w+', regex=True, na=False)].index
idx[0]

4

In [73]:
df1 = pd.read_excel(file_path,  sheet_name='Jul', header=idx[0])
columns = df1.columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 22 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       76 non-null     object
 1   1       75 non-null     object
 2   2       6 non-null      object
 3   3       10 non-null     object
 4   4       17 non-null     object
 5   5       11 non-null     object
 6   6       19 non-null     object
 7   7       14 non-null     object
 8   8       29 non-null     object
 9   9       24 non-null     object
 10  10      29 non-null     object
 11  11      24 non-null     object
 12  12      8 non-null      object
 13  13      29 non-null     object
 14  14      40 non-null     object
 15  15      17 non-null     object
 16  16      19 non-null     object
 17  17      20 non-null     object
 18  18      15 non-null     object
 19  19      34 non-null     object
 20  20      16 non-null     object
 21  21      22 non-null     object
dtypes: object(22)
memory usage: 

In [74]:

df2 = df1.dropna(subset=[columns[0]])
df2 = df2.drop(columns='Total')
df2 = df2.fillna(0)
df2 = df2.sort_values([columns[0]])
df2 = df2.set_index(columns[0])
df2.index.name=None
df2 = df2.drop('Totals')


In [79]:
for x in df2['EF1']:
    print(x)

0
0
1
15
0
0
0
5
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
`
10
0
0
0
1
0
0
0
0
0
3
0
4
2
0
0
0
0
4
0
0
0
0
63
0
0
1
2
3
1
0
2
49
0
2
4
0
0
0
0
2
1
0
2
0


In [46]:
# transpose to long form
df4 = pd.DataFrame()
for species in df2.index:
    species_count = df2.loc[species].T
    species_df = pd.DataFrame({'id': species_count.index, 'count': species_count.values, 'date': '2022-01', 'species': species})
    df4 = pd.concat([df4, species_df])
df4

Unnamed: 0,id,count,date,species
0,OE,0,2022-01,Accipiter sp.
1,WD,0,2022-01,Accipiter sp.
2,SR,0,2022-01,Accipiter sp.
3,TD1,0,2022-01,Accipiter sp.
4,TD2,0,2022-01,Accipiter sp.
...,...,...,...,...
15,SA,1,2022-01,Winter Wren (Pacific)
16,IM,0,2022-01,Winter Wren (Pacific)
17,MC1,0,2022-01,Winter Wren (Pacific)
18,MC2,0,2022-01,Winter Wren (Pacific)


In [41]:
# Total species
total_count = df2.gt(0).sum(axis=0)
df3 = pd.DataFrame({'id': total_count.index, 'counts': total_count.values, 'date':'2022-01', 'species': 'Total Species'})
df3

Unnamed: 0,id,counts,date,species
0,OE,6,2022-01,Total Species
1,WD,4,2022-01,Total Species
2,SR,7,2022-01,Total Species
3,TD1,5,2022-01,Total Species
4,TD2,8,2022-01,Total Species
5,TD3,3,2022-01,Total Species
6,EF1,11,2022-01,Total Species
7,EF2,5,2022-01,Total Species
8,CC,8,2022-01,Total Species
9,CS,12,2022-01,Total Species


In [20]:
import json
import plotly.express as px
with open(r"C:\Users\kylek\OneDrive\Documents\Code\shared_with_VM\bird_count\data\areas.json") as file:
    areas = json.load(file)


fig = px.choropleth_mapbox(final, geojson=areas, locations='id', color='counts',
                           color_continuous_scale="Purp",
                           range_color=(0, final.counts.max()),
                           mapbox_style="carto-positron",
                           zoom=12.5, center = {"lat": 49.7, "lon": -123.15},
                           opacity=0.5,
                           labels={'counts':'Total Species', 'id': "Area"},
                           animation_frame="date"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [18]:

# importing packages
import plotly.express as px
  
fig = px.colors.sequential.swatches_continuous()
fig.show()

In [20]:
with open(r"C:\Users\kylek\OneDrive\Documents\Code\shared_with_VM\bird_count\data\areas2.json") as file:
    areas = json.load(file)
with open(r'C:\Users\kylek\OneDrive\Documents\Code\shared_with_VM\bird_count\.mapbox_token') as file:
    token = file.read()

app = JupyterDash(__name__)

df = final

app.layout = html.Div([
    html.Div([
        html.H1('Squamish Monthly Bird Count'),
        dcc.Dropdown(
            sorted(df['species'].unique()),
            'Total Species Count',
            id='species-dropdown'
        )],style={'width':'250px'}
    ),
    html.Div(
        dcc.Graph(id='count-graph', 
                  style={'height': '100%'},
                  config=dict(responsive=True)),
        style={'flex':3, 'background-color': '#f1f1f1'}
    )
], style={'display': 'flex', 'flex-direction': 'row','height': 'calc(100vh - 16px)','background-color': 'DodgerBlue', 'align-items': 'stretch'})


@app.callback(
    Output('count-graph', 'figure'),
    Input('species-dropdown', 'value'))
def update_graph(species):
    dff = df[df['species'] == species]

    fig = px.choropleth_mapbox(dff, geojson=areas, locations='id', featureidkey="properties.id", color='count',
                            color_continuous_scale="Purp",
                            range_color=(0, dff['count'].max()),
                            zoom=12.5, center = {"lat": 49.7, "lon": -123.15},
                            opacity=0.5,
                            labels={'count': 'Count', 'id': "Area"},
                            animation_frame="date",
                            
                            )
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, 
                      mapbox_accesstoken=token,
                      mapbox_style="satellite-streets")
    fig['layout']['updatemenus'][0]['pad']=dict(r= 20, t= 25) 
    fig['layout']['sliders'][0]['pad']=dict(r= 0, t= 0, b=20)

    return fig


if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash app running on http://127.0.0.1:8050/


In [9]:
final.columns

Index(['id', 'count', 'date', 'species'], dtype='object')

In [10]:
final['id'].unique()

array(['OE', 'WD', 'SR', 'TD1', 'TD2', 'TD3', 'EF1', 'EF2', 'CC', 'CS',
       'EC', 'EM', 'BP', 'NF1', 'NF2', 'SA', 'IM', 'MC1', 'MC2', 'DW'],
      dtype=object)