In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from shapely.geometry import Polygon
import scipy.stats

import sys
sys.path.append('../')
from helper_functions.gridding import h3_grid
from helper_functions.inference_models import ActualDistribution
from helper_functions.metrics import generate_richness_frame, generate_shannon_frame, generate_simpson_frame
from web_interface.func_import import load_data, add_geometry, hex_to_geojson

In [2]:
df = load_data()  
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)
df['Year'] = df['Date'].apply(lambda row: row[:4])
df

Unnamed: 0,ScientificName,TaxonID,Date,Latitude,Longitude,Municipality,Year
0,Strix aluco,http://tun.fi/MX.28998,2000-01-01 - 2000-12-31,60.340900,25.242500,Sipoo,2000
1,Fringilla coelebs,http://tun.fi/MX.36237,2001-07-24,60.177000,24.202000,Siuntio,2001
2,Certhia familiaris,http://tun.fi/MX.34616,2000-05-14,60.442100,24.519000,Vihti,2000
3,Cyanistes caeruleus,http://tun.fi/MX.34574,2000-04-25,61.593000,27.318800,Mikkeli,2000
4,Corvus monedula,http://tun.fi/MX.37142,2001-04-22,60.177000,24.202000,Siuntio,2001
...,...,...,...,...,...,...,...
987433,Picoides tridactylus,http://tun.fi/MX.30453,2022-06-07,68.925050,26.778422,Inari,2022
987434,Picoides tridactylus,http://tun.fi/MX.30453,2022-06-21,69.018273,26.737819,Inari,2022
987435,Motacilla flava,http://tun.fi/MX.32180,2022-05-19,61.226294,28.811891,Imatra,2022
987436,Delichon urbicum,http://tun.fi/MX.32163,2022-05-19,61.221876,28.797762,Imatra,2022


In [3]:
grid_object = h3_grid()
grid_object.fit(df)

In [13]:
actual_model = ActualDistribution('h3_cell', 'ScientificName')
actual_dist = actual_model.fit(df)
richness = generate_richness_frame(actual_dist)

fig = px.histogram(richness, x="richness", title = "Distribution of species richness measurements")
fig.show()

In [14]:
shannon_entropies = generate_shannon_frame(actual_dist)

fig = px.histogram(shannon_entropies, x="shannon_entropy", title = "Distribution of shannon entropy measurements")
fig.show()

In [15]:
simpson_indices = generate_simpson_frame(actual_dist)

fig = px.histogram(simpson_indices, x="simpson_index", title = "Distribution of simpson index measurements")
fig.show()

In [7]:
metrics = pd.merge(richness, shannon_entropies)
metrics = pd.merge(metrics, simpson_indices)
metrics

Unnamed: 0,h3_cell,richness,shannon_entropy,simpson_index
0,85012603fffffff,61,3.744717,0.869208
1,85012613fffffff,71,4.411372,0.920463
2,85012617fffffff,25,4.388155,0.944527
3,8501261bfffffff,104,5.709731,0.971990
4,85012643fffffff,80,5.773537,0.976973
...,...,...,...,...
2063,85112ed3fffffff,127,5.731707,0.964359
2064,85112ed7fffffff,107,5.510233,0.954679
2065,85112edbfffffff,139,6.061747,0.973637
2066,851135a7fffffff,1,-0.000000,0.000000


In [16]:
richness_np = np.array(metrics['richness'])
shannon_np = np.array(metrics['shannon_entropy'])
simpson_np = np.array(metrics['simpson_index'])

fig = px.imshow(np.corrcoef([richness_np, shannon_np, simpson_np]), 
                x = ["richness", "shannon", "simpson"], 
                y = ["richness", "shannon", "simpson"],
                zmin = - 1, # Sets the lower bound of the color domain
                zmax = 1,
                text_auto=True, color_continuous_scale=["blue", "white", "red"],
                title = "Pearson correlation between metrics")
fig.show()

In [17]:
corr_matrix, p_matrix = scipy.stats.spearmanr([richness_np, shannon_np, simpson_np], axis=1)
fig = px.imshow(corr_matrix, 
                x = ["richness", "shannon", "simpson"], 
                y = ["richness", "shannon", "simpson"],
                zmin = - 1, # Sets the lower bound of the color domain
                zmax = 1,
                text_auto=True, color_continuous_scale=["blue", "white", "red"],
                title = "Spearsman correlation between metrics")
fig.show()

It is visible from the both correlation analysis that while richness has positive correlation with Shannon entropy and Simpson index, such correlation is not extraordinarily significant. This is because Shannon entropy and Simpson index both takes into account the eveness of species distribution, rather than just species richness.