This notebook transforms maps of sites and nests into images with plain backgrounds and clusters those images into 3 classes to explore patterns the clusters of nests might share.

Load libraries needed for this analysis:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
import matplotlib.colors as mcolors

# functions for K-means clustering
import code_for_image_clustering_with_Kmeans as ic

Load datasets and combine them into a dataframe:

In [None]:
dataframe_location_2019_2020 = pd.read_csv("../../resources/original_data/FinlandNestDatafile.csv")
dataframe_location_2021 = pd.read_csv("../../resources/original_data/Finland_nestdata2021_mod.csv")
dataframe_mobbing_2019_2020 = pd.read_csv("../../resources/original_data/FinlandMobbingDatafile.csv")
dataframe_mobbing_2021 = pd.read_csv("../../resources/original_data/Finland_ExperimentData2021_mod.csv")

# combine mobbing dataframes:
dataframe_mobbing = pd.concat([dataframe_mobbing_2021,dataframe_mobbing_2019_2020], axis=0, ignore_index=True)
# combine locations:
dataframe_location_2021['Year'] = np.repeat(2021, dataframe_location_2021.shape[0])
dataframe_location = pd.concat([dataframe_location_2019_2020,dataframe_location_2021], axis=0, ignore_index=True)

dataframe_mobbing = dataframe_mobbing.drop(
    columns=['Site', 'Year', 'lat', 'long', 'Cuckoo_perch', 'New_rebuild', 'Rebuild_original']
)

data = pd.merge(dataframe_location, dataframe_mobbing, left_on='NestID', right_on='NestID', how = 'left')

Create a unique for each site by combining columns 'Site' and 'Year':

In [None]:
# create clusterId:
data['Site'] = data['Site'].str.strip()
data['clusterId'] = data.Site  + '_' + data.Year.astype(str)

Transform maps of sites and nests into images with plain backgrounds:

The output images are saved into 'maps' folder.

In [None]:
# set color palette: 
# (shy = green, aggressive = red, no-data nest = black)
cmap, norm = mcolors.from_levels_and_colors([-1, 0.5, 1.5, 3], ['green', 'red', 'black'])


# transform sites into images and save them into 'maps' folder
for c_id in data.clusterId.unique():
    tmp = data[data['clusterId'] == c_id].copy().reset_index()
    # choose columns and replace NaN with '-1'
    tmp = tmp[['lat', 'long', 'Propensity']].fillna(2)
    plt.scatter(tmp.long, tmp.lat,c = tmp.Propensity, cmap=cmap, norm=norm)
    plt.axis('off')
    plt.savefig("maps/" + c_id + ".png", bbox_inches='tight', facecolor = 'white')
    #plt.show()

Cluster images saved in 'maps' into 3 classes using K-means algorithm:

The following parameters can be changed:
- number_of_cluster
- data_path: path of the folder that contains the images used in the clustering

In [None]:
number_of_clusters = 3 
data_path = "maps"

temp = ic.image_clustering(data_path, number_of_clusters)
temp.load_images()
temp.clustering()

The output classes of the clustering can be found in 'KMeans_output' folder.