# Data and Map visualisation

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

First necessary data is read into the dataframes.

In [None]:
dataframe_location_2019_2020 = pd.read_csv("../../resources/original_data/FinlandNestDatafile.csv")
dataframe_location_2021 = pd.read_csv("../../resources/original_data/Finland_nestdata2021_mod.csv")
dataframe_mobbing_2019_2020 = pd.read_csv("../../resources/original_data/FinlandMobbingDatafile.csv")
dataframe_mobbing_2021 = pd.read_csv("../../resources/original_data/Finland_ExperimentData2021_mod.csv")

# combine mobbing dataframes:
dataframe_mobbing = pd.concat([dataframe_mobbing_2021,dataframe_mobbing_2019_2020], axis=0, ignore_index=True)
# combine locations:
dataframe_location_2021['Year'] = np.repeat(2021, dataframe_location_2021.shape[0])
dataframe_location = pd.concat([dataframe_location_2019_2020,dataframe_location_2021], axis=0, ignore_index=True)

dataframe_mobbing = dataframe_mobbing.drop(
    columns=['Site', 'Year', 'lat', 'long', 'Cuckoo_perch', 'New_rebuild', 'Rebuild_original']
)

data = pd.merge(dataframe_location, dataframe_mobbing, left_on='NestID', right_on='NestID', how = 'left')

#### Site Data
First a new column *mobbing* is added into the dataframe in order to create plots by the propensity values.

In [None]:
for i in range(data.shape[0]):
    if data['Propensity'][i] == 0:
        data.loc[i, 'Mobbing'] = 'Shy'
    elif data['Propensity'][i] == 1:
        data.loc[i, 'Mobbing'] = 'Aggressive'
    else:
        data.loc[i, 'Mobbing'] = 'No info'
for i in range(data.shape[0]):
    if data['Year'][i] == 2019:
        data.loc[i, 'Years'] = '2019'
    elif data['Year'][i] == 2020:
        data.loc[i, 'Years'] = '2020'
    else:
        data.loc[i, 'Years'] = '2021'

The following code blocks generates lists of the sites. For each year only sites related to them are listed.

In [None]:
data[data.Year == 2021].Site.str.strip().value_counts()

In [None]:
data[data.Year == 2020].Site.str.strip().value_counts()

In [None]:
data[data.Year == 2019].Site.str.strip().value_counts()
print(len(data[data.Year == 2019]))
print(len(data[data.Year == 2020]))
print(len(data[data.Year == 2021]))

#### Stacked Bar Charts for Sites
Following code blocks generates stacked bar charts for each year. Only the sites relevant for the year are shown in the charts.

In [None]:
def create_stacked_bar_chart(data, year):
    unique_sites = data[data.Year == year].Site.str.strip().unique()
    a = []
    
    for i in range(len(unique_sites)):
        #print([unique_sites[i]] + plot_one_year(data, year, unique_sites[i]))
        a.append([unique_sites[i]] + plot_one_year(data, year, unique_sites[i]))
    
    a = pd.DataFrame(a, columns=['Site', 'No_test', 'N_shy', 'N_aggressive'])

    
    a.plot(
        x = 'Site',
        kind = 'barh',
        stacked = True,
        title = 'Stacked Bar Graph',
        mark_right = True)

def plot_one_year(data, year, site, plot = False):
    tmp = data[(data.Year == year) & (data.Site == site)]
    
    if plot:
        fig = px.scatter_mapbox(tmp, lat="lat", lon="long", hover_name="NestID", hover_data=["Site", "Propensity"], 
                            height = 300, color = "Mobbing", 
                            color_discrete_map={ 'Shy': 'blue', 'Aggressive': 'red', 'No info': 'grey'},
                            width = 500, zoom = 13, title = site + ', Year: ' + str(year),
                            category_orders={"Mobbing":["Shy","Aggressive","No info"]})
        fig.update_layout(mapbox_style="open-street-map")
        fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})
        fig.show()
    
    n_no_test = np.sum(pd.isnull(tmp.Propensity))
    n_shy = np.sum(tmp.Propensity == 0)
    n_aggressive = np.sum(tmp.Propensity == 1)
    
    return [n_no_test, n_shy, n_aggressive]

In [None]:
create_stacked_bar_chart(data, 2019)

In [None]:
create_stacked_bar_chart(data, 2020)

In [None]:
create_stacked_bar_chart(data, 2021)

#### Plot Mobbing Data by Site
In the following code block are the helper functions for map plotting by a site both for each year separately and for all year

In [None]:
def plot_all_years(data, site):
    tmp = data[(data.Site == site)]
    
    
    fig = px.scatter_mapbox(tmp, lat="lat", lon="long", hover_name="NestID", hover_data=["Site", "Propensity"], 
                        height = 300, color = 'Years',
                        color_discrete_map={'2019': '#a4054f', '2020': '#4c339f', '2021': '#fab82f'},
                        width = 500, zoom = 13, title = site)
    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0})
    fig.show()


Assign the wanted site for the variable `site`. The possible choises can be checkec from the generated site lists above. Default value '' (empty string).

In [None]:
site = 'Smedjeviken'

plot_one_year(data, 2019, site, True)
plot_one_year(data, 2020, site, True)
plot_one_year(data, 2021, site, True)

plot_all_years(data, site)

All nests plotted by a given year. Assign the wanted year (2019,2020 or 2021) into variable `year`. Default is 2021.

In [None]:

year = 2021
fig = px.scatter_mapbox(data[data.Year == year], lat="lat", lon="long", hover_name="NestID",
  hover_data=["Site"], color="Site", zoom=13, height=600)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

#### Scatterplots from Sites and Propensity Data
The code snippet below creates a scatterplot for each year. The plot present the number of nests witout data, with aggressive nests (Porpensity 1) and shy birds (Propensity 0).   
Each plot contains all the sites seen in data.
                                                                                         

In [None]:
from utils.site_differences import create_site_diffs_data

def create_stacked_bar_chart_all_sites(data, year):
    df = pd.DataFrame({'Site': data.index.values, 'Shy': data[f'Shy{year}'],
        'Aggressive': data[f'Aggressive{year}'], 'No data': data[f'No_data{year}']})

    df.plot(
        x = 'Site',
        kind = 'barh',
        stacked = True,
        title = f'Site data {year}',
        xlim=(0,25),
        mark_right = True,
        figsize=(10,12),
        fontsize=14
        )
    plt.legend(['Shy', 'Aggressive', 'No data'], prop={'size': 16})
df = create_site_diffs_data()
create_stacked_bar_chart_all_sites(df, 2019)
create_stacked_bar_chart_all_sites(df, 2020)
create_stacked_bar_chart_all_sites(df, 2021)