# Cluster Plots and Interactive Cluster Maps
In this Jupyter Notebook first we plot figures with different clusters. Here clusters are created by distances between nests. The code can be found in the `clustering.py` script. The possible distances were set to 15, 30, 50, 100, 200 and 300 meters. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from utils.data_utils import fetch_data

Below are the functions that are used to collect propensity data in the clusters and the functions for plotting the data.

In [None]:

def generate_cluster_info(cluster: int, year: int, general_data: pd.DataFrame, 
    nests: pd.DataFrame, cluster_size_data: pd.DataFrame, dist:int):
    _, _, df_location_data  = fetch_data()
    all_nests = len([nestID for nestID in nests.index.values])
    nests_with_data = len([nestID for nestID in nests.index.values if nestID in general_data.index])
    sites = list(set([df_location_data.loc[nestID]['Site'].strip() for nestID in nests.index.values]))
    total_propensity = sum([general_data.loc[nestID]['Propensity']  if nestID in general_data.index else 0.2 for nestID in nests.index.values])
    row = pd.DataFrame({'ClusterID': [f'ClusterID {cluster}'], 'Year': [year], 'Dist': [dist], 'Site': [sites], 'Nests_with_data': [nests_with_data],
        'All_nests': [all_nests], 'Total_propensity': [total_propensity]})
    return cluster_size_data.append(row, ignore_index=True)

# Create cluster size vs propensity data
def create_cluster_propensity(years: list, dist: int) -> pd.DataFrame:
    cluster_size_data  = pd.DataFrame(columns=['ClusterID', 'Year', 'Dist', 'Site',
        'Nests_with_data','All_nests','Total_angrybirds'])
    cluster_size_data = cluster_size_data.astype({'ClusterID': str})
    df_general_data, df_clusters, _ = fetch_data()
    for year in years:
        nests_current_year = df_clusters[df_clusters['Year'] == year]
        for i in range(min(df_clusters[f'ClusterID_{dist}']),max(df_clusters[f'ClusterID_{dist}'])+1):
            nests = nests_current_year[nests_current_year[f'ClusterID_{dist}'] == i]
            if len(nests) > 0:
                cluster_size_data = generate_cluster_info(i, year, df_general_data, nests, cluster_size_data, dist)
    return cluster_size_data

def create_plots_cluster_propensity(dist):
    _, df_clusters, _ = fetch_data()
    YEARS = df_clusters['Year'].unique()
    cluster_size_data = create_cluster_propensity(YEARS, dist)
    for year in YEARS:
        df = cluster_size_data[(cluster_size_data.Dist==dist) & (cluster_size_data.Year==year)]
        
        sns.set_theme(style="whitegrid")
        f, ax = plt.subplots(figsize=(12, 10))
        plt.title(f'Propensity vs Cluster Size {year}', fontsize=20)
        sns.set_color_codes("pastel")
        sns.barplot(x='All_nests',
            y='ClusterID', data=df,
            label='Nest size', color='b')
        sns.set_color_codes("muted")    
        sns.barplot(x='Total_propensity', 
            y='ClusterID', data=df,
            label='Propensity', color='b')

        # Add a legend and informative axis label
        ax.legend(ncol=2, loc="lower right", frameon=True)
        ax.set(xlim=(0, 25), ylabel='ClusterID',
            xlabel='')
        for i in ax.containers:
            ax.bar_label(i,)
        sns.despine(left=True, bottom=True)
        plt.show()

def plot_mobbing_vs_cluster_size(dist):
    _, df_clusters, _ = fetch_data()
    YEARS = df_clusters['Year'].unique()
    cluster_size_data = create_cluster_propensity(YEARS, dist)
    sns.set_theme(style="whitegrid")
    plt.figure(figsize=(10,15))
    plt.subplot(2,1,1)
    plt.title(f'Propensity Compared to Cluster Size (distance {dist} m)', fontsize=20)
    sns.stripplot(x=cluster_size_data[(cluster_size_data.Dist==dist) ]['All_nests'],
        y=cluster_size_data[(cluster_size_data.Dist==dist)]['Total_propensity'],
        jitter=0.3)
    plt.ylabel('Number of Attacks (no data nests counts for 0.2))',fontsize=15)
    plt.show()


First we plot the propensity data for all cluster distances. No data nests are considered to be aggressive with 20 percent probability. There exist only a few clusters with a high number of nests. Therefore it can not be said for sure whether the cluster size affects the number of attacks. Moreover, the clusters with long distances have the clusters with higher numbers of nests. The reed warblers can learn the behaviour of the neighbour nest when the nests are approximately within 15 metres. Thus the number of propensities with bigger clusters may not give as reliable data to say anything about the behaviour. In these plots all years are present in each cluster distance plot.

The last plot shows the number of nests and the propensity in the cluster for all years separately. One can change the distance between the nests and give that as a parameter for the function. The allowed distances are 15, 30, 50, 100, 200 and 300.

In [None]:
for dist in [15,30,50,100,200,300]:
  plot_mobbing_vs_cluster_size(dist)

# Here you can choose the cluster distance.
dist = 50
create_plots_cluster_propensity(dist)

The following function creates an interactive map for the all nests present for the chosen year. If the map is zoomed closer the clusters in the map grows smaller and the number of them increases. More information about the nest is obtained by clicking a nest. The distance between nests and therefore the nummber of existing clusters, can be given by the user. The clusterid is shown in the nest data.

In [None]:
def plot_clustermap(year, dist):
    df_general_data, df_clusters, df_location_data = fetch_data()
# Basic cluster map with folium
    lat_coord = (max(df_location_data['lat']) + min(df_location_data['lat']))/2
    long_coord = (max(df_location_data['long']) + min(df_location_data['long']))/2
    map = folium.Map(location=[lat_coord, long_coord], default_zoom_start=15, control_scale=True)
    cluster = MarkerCluster(name=f'Nests {year}')
    nests_current_year = df_clusters[df_clusters['Year'] == year]
    for i in range(min(df_clusters[f'ClusterID_{dist}']),max(df_clusters[f'ClusterID_{dist}'])+1):
        nests = nests_current_year[nests_current_year[f'ClusterID_{dist}'] == i]
        if len(nests) > 0:
            df_nests = df_location_data.loc[nests.index]
            df_nests['NestID'] = df_nests.index.values
            df_nests['Propensity'] = [df_general_data.loc[nestID]['Propensity'] if nestID in df_general_data.index else np.nan for nestID in nests.index.values]
            df_nests.apply(
                lambda row:
                    folium.Marker(
                        location=[row['lat'],row['long']],
                        popup= [i,row['NestID'],row['Site'],row['Propensity']],
                        tooltip='<h5>Click for more info</5>',
                    ).add_to(cluster),
                    axis=1)
            cluster.add_to(map)
    return map

In [None]:
map2019 = plot_clustermap(2019, 50)
map2019

In [None]:
map2020 = plot_clustermap(2020, 50)
map2020

In [None]:
map2021 = plot_clustermap(2021, 50)
map2021